[PATCH v2 0/4] support memory failure

v1->v2: Seperate a 'all in one' patch into 4 patches. Use a 'flags' with bit definition instead of 'action_required' & 'recursive' for extention. Queue event directly without internal job. Add full test method in commit. v1: Since QEMU 5.2 (commit-77b285f7f6), QEMU supports 'memory failure' event, posts event to monitor if hitting a hardware memory error. zhenwei pi (4): API: introduce memory failure qemu: process: implement domainMemoryFailure qemu: monitor: handle memory failure event virsh: implement memory failure event examples/c/misc/event-test.c | 16 ++++++++ include/libvirt/libvirt-domain.h | 82 +++++++++++++++++++++++++++++++++++++ src/conf/domain_event.c | 80 ++++++++++++++++++++++++++++++++++++ src/conf/domain_event.h | 12 ++++++ src/libvirt_private.syms | 2 + src/qemu/qemu_monitor.c | 21 +++++++++- src/qemu/qemu_monitor.h | 39 ++++++++++++++++++ src/qemu/qemu_monitor_json.c | 49 ++++++++++++++++++++++ src/qemu/qemu_process.c | 59 ++++++++++++++++++++++++++ src/remote/remote_daemon_dispatch.c | 32 +++++++++++++++ src/remote/remote_driver.c | 32 +++++++++++++++ src/remote/remote_protocol.x | 16 +++++++- src/remote_protocol-structs | 8 ++++ tools/virsh-domain.c | 40 ++++++++++++++++++ 14 files changed, 486 insertions(+), 2 deletions(-) -- 2.11.0

Introduce memory failure event. Libvirt should monitor domain's event, then posts it to uplayer. According to the hardware memory corrupted message, the cloud scheduler could migrate domain to another health physical server. Signed-off-by: zhenwei pi <pizhenwei@bytedance.com> --- include/libvirt/libvirt-domain.h | 82 +++++++++++++++++++++++++++++++++++++ src/conf/domain_event.c | 80 ++++++++++++++++++++++++++++++++++++ src/conf/domain_event.h | 12 ++++++ src/libvirt_private.syms | 2 + src/remote/remote_daemon_dispatch.c | 32 +++++++++++++++ src/remote/remote_driver.c | 32 +++++++++++++++ src/remote/remote_protocol.x | 16 +++++++- src/remote_protocol-structs | 8 ++++ 8 files changed, 263 insertions(+), 1 deletion(-) diff --git a/include/libvirt/libvirt-domain.h b/include/libvirt/libvirt-domain.h index 77f9116675..5138843a56 100644 --- a/include/libvirt/libvirt-domain.h +++ b/include/libvirt/libvirt-domain.h @@ -3196,6 +3196,64 @@ typedef enum { } virDomainEventCrashedDetailType; /** + * virDomainMemoryFailureRecipientType: + * + * Recipient of a memory failure event. + */ +typedef enum { + /* memory failure at hypersivor memory address space */ + VIR_DOMAIN_EVENT_MEMORY_FAILURE_RECIPIENT_HYPERVISOR = 0, + + /* memory failure at guest memory address space */ + VIR_DOMAIN_EVENT_MEMORY_FAILURE_RECIPIENT_GUEST = 1, + +# ifdef VIR_ENUM_SENTINELS + VIR_DOMAIN_EVENT_MEMORY_FAILURE_RECIPIENT_LAST +# endif +} virDomainMemoryFailureRecipientType; + + +/** + * virDomainMemoryFailureActionType: + * + * Action of a memory failure event. + */ +typedef enum { + /* the memory failure could be ignored. This will only be the case for + * action-optional failures. */ + VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_IGNORE = 0, + + /* memory failure occurred in guest memory, the guest enabled MCE handling + * mechanism, and hypervisor could inject the MCE into the guest + * successfully. */ + VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_INJECT = 1, + + /* the failure is unrecoverable. This occurs for action-required failures + * if the recipient is the hypervisor; hypervisor will exit. */ + VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_FATAL = 2, + + /* the failure is unrecoverable but confined to the guest. This occurs if + * the recipient is a guest which is not ready to handle memory failures. */ + VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_RESET = 3, + +# ifdef VIR_ENUM_SENTINELS + VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_LAST +# endif +} virDomainMemoryFailureActionType; + + +typedef enum { + /* whether a memory failure event is action-required or action-optional + * (e.g. a failure during memory scrub). */ + VIR_DOMAIN_MEMORY_FAILURE_ACTION_REQUIRED = (1 << 0), + + /* whether the failure occurred while the previous failure was still in + * progress. */ + VIR_DOMAIN_MEMORY_FAILURE_RECURSIVE = (1 << 1), +} virDomainMemoryFailureFlags; + + +/** * virConnectDomainEventCallback: * @conn: virConnect connection * @dom: The domain on which the event occurred @@ -4565,6 +4623,29 @@ typedef void (*virConnectDomainEventBlockThresholdCallback)(virConnectPtr conn, void *opaque); /** + * virConnectDomainEventMemoryFailureCallback: + * @conn: connection object + * @dom: domain on which the event occurred + * @recipient: the recipient of hardware memory failure + * @action: the action of hardware memory failure + * @flags: the flags of hardware memory failure + * @opaque: application specified data + * + * The callback occurs when the hypervisor handles the hardware memory + * corrupted event. + * + * The callback signature to use when registering for an event of type + * VIR_DOMAIN_EVENT_ID_MEMORY_FAILURE with virConnectDomainEventRegisterAny() + */ +typedef void (*virConnectDomainEventMemoryFailureCallback)(virConnectPtr conn, + virDomainPtr dom, + virDomainMemoryFailureRecipientType recipient, + virDomainMemoryFailureActionType action, + unsigned int flags, + void *opaque); + + +/** * VIR_DOMAIN_EVENT_CALLBACK: * * Used to cast the event specific callback into the generic one @@ -4606,6 +4687,7 @@ typedef enum { VIR_DOMAIN_EVENT_ID_DEVICE_REMOVAL_FAILED = 22, /* virConnectDomainEventDeviceRemovalFailedCallback */ VIR_DOMAIN_EVENT_ID_METADATA_CHANGE = 23, /* virConnectDomainEventMetadataChangeCallback */ VIR_DOMAIN_EVENT_ID_BLOCK_THRESHOLD = 24, /* virConnectDomainEventBlockThresholdCallback */ + VIR_DOMAIN_EVENT_ID_MEMORY_FAILURE = 25, /* virConnectDomainEventMemoryFailureCallback */ # ifdef VIR_ENUM_SENTINELS VIR_DOMAIN_EVENT_ID_LAST diff --git a/src/conf/domain_event.c b/src/conf/domain_event.c index a8bd9f1595..4a6051a6ab 100644 --- a/src/conf/domain_event.c +++ b/src/conf/domain_event.c @@ -57,6 +57,7 @@ static virClassPtr virDomainEventJobCompletedClass; static virClassPtr virDomainEventDeviceRemovalFailedClass; static virClassPtr virDomainEventMetadataChangeClass; static virClassPtr virDomainEventBlockThresholdClass; +static virClassPtr virDomainEventMemoryFailureClass; static void virDomainEventDispose(void *obj); static void virDomainEventLifecycleDispose(void *obj); @@ -79,6 +80,7 @@ static void virDomainEventJobCompletedDispose(void *obj); static void virDomainEventDeviceRemovalFailedDispose(void *obj); static void virDomainEventMetadataChangeDispose(void *obj); static void virDomainEventBlockThresholdDispose(void *obj); +static void virDomainEventMemoryFailureDispose(void *obj); static void virDomainEventDispatchDefaultFunc(virConnectPtr conn, @@ -287,6 +289,15 @@ struct _virDomainEventBlockThreshold { typedef struct _virDomainEventBlockThreshold virDomainEventBlockThreshold; typedef virDomainEventBlockThreshold *virDomainEventBlockThresholdPtr; +struct _virDomainEventMemoryFailure { + virDomainEvent parent; + + virDomainMemoryFailureRecipientType recipient; + virDomainMemoryFailureActionType action; + unsigned int flags; +}; +typedef struct _virDomainEventMemoryFailure virDomainEventMemoryFailure; +typedef virDomainEventMemoryFailure *virDomainEventMemoryFailurePtr; static int virDomainEventsOnceInit(void) @@ -333,6 +344,8 @@ virDomainEventsOnceInit(void) return -1; if (!VIR_CLASS_NEW(virDomainEventBlockThreshold, virDomainEventClass)) return -1; + if (!VIR_CLASS_NEW(virDomainEventMemoryFailure, virDomainEventClass)) + return -1; return 0; } @@ -542,6 +555,14 @@ virDomainEventBlockThresholdDispose(void *obj) } +static void +virDomainEventMemoryFailureDispose(void *obj) +{ + virDomainEventMemoryFailurePtr event = obj; + VIR_DEBUG("obj=%p", event); +} + + static void * virDomainEventNew(virClassPtr klass, int eventID, @@ -1619,6 +1640,52 @@ virDomainEventBlockThresholdNewFromDom(virDomainPtr dom, } +static virObjectEventPtr +virDomainEventMemoryFailureNew(int id, + const char *name, + unsigned char *uuid, + virDomainMemoryFailureRecipientType recipient, + virDomainMemoryFailureActionType action, + unsigned int flags) +{ + virDomainEventMemoryFailurePtr ev; + + if (virDomainEventsInitialize() < 0) + return NULL; + + if (!(ev = virDomainEventNew(virDomainEventMemoryFailureClass, + VIR_DOMAIN_EVENT_ID_MEMORY_FAILURE, + id, name, uuid))) + return NULL; + + ev->recipient = recipient; + ev->action = action; + ev->flags = flags; + + return (virObjectEventPtr)ev; +} + +virObjectEventPtr +virDomainEventMemoryFailureNewFromObj(virDomainObjPtr obj, + virDomainMemoryFailureRecipientType recipient, + virDomainMemoryFailureActionType action, + unsigned int flags) +{ + return virDomainEventMemoryFailureNew(obj->def->id, obj->def->name, + obj->def->uuid, recipient, action, + flags); +} + +virObjectEventPtr +virDomainEventMemoryFailureNewFromDom(virDomainPtr dom, + virDomainMemoryFailureRecipientType recipient, + virDomainMemoryFailureActionType action, + unsigned int flags) +{ + return virDomainEventMemoryFailureNew(dom->id, dom->name, dom->uuid, + recipient, action, flags); +} + static void virDomainEventDispatchDefaultFunc(virConnectPtr conn, virObjectEventPtr event, @@ -1902,6 +1969,19 @@ virDomainEventDispatchDefaultFunc(virConnectPtr conn, cbopaque); goto cleanup; } + case VIR_DOMAIN_EVENT_ID_MEMORY_FAILURE: + { + virDomainEventMemoryFailurePtr memoryFailureEvent; + + memoryFailureEvent = (virDomainEventMemoryFailurePtr)event; + ((virConnectDomainEventMemoryFailureCallback)cb)(conn, dom, + memoryFailureEvent->recipient, + memoryFailureEvent->action, + memoryFailureEvent->flags, + cbopaque); + goto cleanup; + } + case VIR_DOMAIN_EVENT_ID_LAST: break; } diff --git a/src/conf/domain_event.h b/src/conf/domain_event.h index d1cfb81d62..1d001e164e 100644 --- a/src/conf/domain_event.h +++ b/src/conf/domain_event.h @@ -255,6 +255,18 @@ virDomainEventBlockThresholdNewFromDom(virDomainPtr dom, unsigned long long threshold, unsigned long long excess); +virObjectEventPtr +virDomainEventMemoryFailureNewFromObj(virDomainObjPtr obj, + virDomainMemoryFailureRecipientType recipient, + virDomainMemoryFailureActionType action, + unsigned int flags); + +virObjectEventPtr +virDomainEventMemoryFailureNewFromDom(virDomainPtr dom, + virDomainMemoryFailureRecipientType recipient, + virDomainMemoryFailureActionType action, + unsigned int flags); + int virDomainEventStateRegister(virConnectPtr conn, virObjectEventStatePtr state, diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 152083d220..927de5001a 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -704,6 +704,8 @@ virDomainEventLifecycleNew; virDomainEventLifecycleNewFromDef; virDomainEventLifecycleNewFromDom; virDomainEventLifecycleNewFromObj; +virDomainEventMemoryFailureNewFromDom; +virDomainEventMemoryFailureNewFromObj; virDomainEventMetadataChangeNewFromDom; virDomainEventMetadataChangeNewFromObj; virDomainEventMigrationIterationNewFromDom; diff --git a/src/remote/remote_daemon_dispatch.c b/src/remote/remote_daemon_dispatch.c index 32ebcd8f36..078467f8da 100644 --- a/src/remote/remote_daemon_dispatch.c +++ b/src/remote/remote_daemon_dispatch.c @@ -1302,6 +1302,37 @@ remoteRelayDomainEventBlockThreshold(virConnectPtr conn, } +static int +remoteRelayDomainEventMemoryFailure(virConnectPtr conn, + virDomainPtr dom, + virDomainMemoryFailureRecipientType recipient, + virDomainMemoryFailureActionType action, + unsigned int flags, + void *opaque) +{ + daemonClientEventCallbackPtr callback = opaque; + remote_domain_event_memory_failure_msg data; + + if (callback->callbackID < 0 || + !remoteRelayDomainEventCheckACL(callback->client, conn, dom)) + return -1; + + /* build return data */ + memset(&data, 0, sizeof(data)); + data.callbackID = callback->callbackID; + data.recipient = recipient; + data.action = action; + data.flags = flags; + make_nonnull_domain(&data.dom, dom); + + remoteDispatchObjectEventSend(callback->client, remoteProgram, + REMOTE_PROC_DOMAIN_EVENT_MEMORY_FAILURE, + (xdrproc_t)xdr_remote_domain_event_memory_failure_msg, &data); + + return 0; +} + + static virConnectDomainEventGenericCallback domainEventCallbacks[] = { VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventLifecycle), VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventReboot), @@ -1328,6 +1359,7 @@ static virConnectDomainEventGenericCallback domainEventCallbacks[] = { VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventDeviceRemovalFailed), VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventMetadataChange), VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventBlockThreshold), + VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventMemoryFailure), }; G_STATIC_ASSERT(G_N_ELEMENTS(domainEventCallbacks) == VIR_DOMAIN_EVENT_ID_LAST); diff --git a/src/remote/remote_driver.c b/src/remote/remote_driver.c index d318224605..9cd2fd36ae 100644 --- a/src/remote/remote_driver.c +++ b/src/remote/remote_driver.c @@ -405,6 +405,11 @@ remoteDomainBuildEventBlockThreshold(virNetClientProgramPtr prog, void *evdata, void *opaque); static void +remoteDomainBuildEventMemoryFailure(virNetClientProgramPtr prog, + virNetClientPtr client, + void *evdata, void *opaque); + +static void remoteConnectNotifyEventConnectionClosed(virNetClientProgramPtr prog G_GNUC_UNUSED, virNetClientPtr client G_GNUC_UNUSED, void *evdata, void *opaque); @@ -615,6 +620,10 @@ static virNetClientProgramEvent remoteEvents[] = { remoteDomainBuildEventBlockThreshold, sizeof(remote_domain_event_block_threshold_msg), (xdrproc_t)xdr_remote_domain_event_block_threshold_msg }, + { REMOTE_PROC_DOMAIN_EVENT_MEMORY_FAILURE, + remoteDomainBuildEventMemoryFailure, + sizeof(remote_domain_event_memory_failure_msg), + (xdrproc_t)xdr_remote_domain_event_memory_failure_msg }, }; static void @@ -5440,6 +5449,29 @@ remoteDomainBuildEventBlockThreshold(virNetClientProgramPtr prog G_GNUC_UNUSED, } +static void +remoteDomainBuildEventMemoryFailure(virNetClientProgramPtr prog G_GNUC_UNUSED, + virNetClientPtr client G_GNUC_UNUSED, + void *evdata, void *opaque) +{ + virConnectPtr conn = opaque; + remote_domain_event_memory_failure_msg *msg = evdata; + struct private_data *priv = conn->privateData; + virDomainPtr dom; + virObjectEventPtr event = NULL; + + if (!(dom = get_nonnull_domain(conn, msg->dom))) + return; + + event = virDomainEventMemoryFailureNewFromDom(dom, msg->recipient, + msg->action, msg->flags); + + virObjectUnref(dom); + + virObjectEventStateQueueRemote(priv->eventState, event, msg->callbackID); +} + + static int remoteStreamSend(virStreamPtr st, const char *data, diff --git a/src/remote/remote_protocol.x b/src/remote/remote_protocol.x index f4d6147676..5e5e781e76 100644 --- a/src/remote/remote_protocol.x +++ b/src/remote/remote_protocol.x @@ -3469,6 +3469,14 @@ struct remote_domain_event_callback_metadata_change_msg { remote_string nsuri; }; +struct remote_domain_event_memory_failure_msg { + int callbackID; + remote_nonnull_domain dom; + int recipient; + int action; + unsigned int flags; +}; + struct remote_connect_secret_event_register_any_args { int eventID; remote_secret secret; @@ -6668,5 +6676,11 @@ enum remote_procedure { * @priority: high * @acl: domain:read */ - REMOTE_PROC_DOMAIN_BACKUP_GET_XML_DESC = 422 + REMOTE_PROC_DOMAIN_BACKUP_GET_XML_DESC = 422, + + /** + * @generate: both + * @acl: none + */ + REMOTE_PROC_DOMAIN_EVENT_MEMORY_FAILURE = 423 }; diff --git a/src/remote_protocol-structs b/src/remote_protocol-structs index bae0f0b545..c2ae411885 100644 --- a/src/remote_protocol-structs +++ b/src/remote_protocol-structs @@ -2862,6 +2862,13 @@ struct remote_domain_event_callback_metadata_change_msg { int type; remote_string nsuri; }; +struct remote_domain_event_memory_failure_msg { + int callbackID; + remote_nonnull_domain dom; + int recipient; + int action; + u_int flags; +}; struct remote_connect_secret_event_register_any_args { int eventID; remote_secret secret; @@ -3558,4 +3565,5 @@ enum remote_procedure { REMOTE_PROC_DOMAIN_AGENT_SET_RESPONSE_TIMEOUT = 420, REMOTE_PROC_DOMAIN_BACKUP_BEGIN = 421, REMOTE_PROC_DOMAIN_BACKUP_GET_XML_DESC = 422, + REMOTE_PROC_DOMAIN_EVENT_MEMORY_FAILURE = 423, }; -- 2.11.0

This patch failed to compile in my env: FAILED: tools/virsh.p/virsh-domain.c.o [....] -D_FUNCTION_DEF -MD -MQ tools/virsh.p/virsh-domain.c.o -MF tools/virsh.p/virsh-domain.c.o.d -o tools/virsh.p/virsh-domain.c.o -c ../tools/virsh-domain.c In file included from /usr/lib64/glib-2.0/include/glibconfig.h:9, from /usr/include/glib-2.0/glib/gtypes.h:32, from /usr/include/glib-2.0/glib/galloca.h:32, from /usr/include/glib-2.0/glib.h:30, from ../src/util/glibcompat.h:21, from ../src/internal.h:30, from ../tools/virsh.h:25, from ../tools/virsh-domain.h:23, from ../tools/virsh-domain.c:22: /usr/include/glib-2.0/glib/gmacros.h:745:53: error: size of array ‘_GStaticAssertCompileTimeAssertion_185’ is negative 745 | #define G_STATIC_ASSERT(expr) typedef char G_PASTE (_GStaticAssertCompileTimeAssertion_, __COUNTER__)[(expr) ? 1 : -1] G_GNUC_UNUSED | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /usr/include/glib-2.0/glib/gmacros.h:735:47: note: in definition of macro ‘G_PASTE_ARGS’ 735 | #define G_PASTE_ARGS(identifier1,identifier2) identifier1 ## identifier2 | ^~~~~~~~~~~ /usr/include/glib-2.0/glib/gmacros.h:745:44: note: in expansion of macro ‘G_PASTE’ 745 | #define G_STATIC_ASSERT(expr) typedef char G_PASTE (_GStaticAssertCompileTimeAssertion_, __COUNTER__)[(expr) ? 1 : -1] G_GNUC_UNUSED | ^~~~~~~ ../tools/virsh-domain.c:13643:1: note: in expansion of macro ‘G_STATIC_ASSERT’ 13643 | G_STATIC_ASSERT(VIR_DOMAIN_EVENT_ID_LAST == G_N_ELEMENTS(virshDomainEventCallbacks)); | ^~~~~~~~~~~~~~~ [505/984] Compiling C object src/virtqemud.p/remote_remote_daemon_dispatch.c.o ninja: build stopped: subcommand failed. $ I didn't verify if the following patches fixes it. Thanks, DHB On 10/12/20 9:31 AM, zhenwei pi wrote:
Introduce memory failure event. Libvirt should monitor domain's event, then posts it to uplayer. According to the hardware memory corrupted message, the cloud scheduler could migrate domain to another health physical server.
Signed-off-by: zhenwei pi <pizhenwei@bytedance.com> --- include/libvirt/libvirt-domain.h | 82 +++++++++++++++++++++++++++++++++++++ src/conf/domain_event.c | 80 ++++++++++++++++++++++++++++++++++++ src/conf/domain_event.h | 12 ++++++ src/libvirt_private.syms | 2 + src/remote/remote_daemon_dispatch.c | 32 +++++++++++++++ src/remote/remote_driver.c | 32 +++++++++++++++ src/remote/remote_protocol.x | 16 +++++++- src/remote_protocol-structs | 8 ++++ 8 files changed, 263 insertions(+), 1 deletion(-)
diff --git a/include/libvirt/libvirt-domain.h b/include/libvirt/libvirt-domain.h index 77f9116675..5138843a56 100644 --- a/include/libvirt/libvirt-domain.h +++ b/include/libvirt/libvirt-domain.h @@ -3196,6 +3196,64 @@ typedef enum { } virDomainEventCrashedDetailType;
/** + * virDomainMemoryFailureRecipientType: + * + * Recipient of a memory failure event. + */ +typedef enum { + /* memory failure at hypersivor memory address space */ + VIR_DOMAIN_EVENT_MEMORY_FAILURE_RECIPIENT_HYPERVISOR = 0, + + /* memory failure at guest memory address space */ + VIR_DOMAIN_EVENT_MEMORY_FAILURE_RECIPIENT_GUEST = 1, + +# ifdef VIR_ENUM_SENTINELS + VIR_DOMAIN_EVENT_MEMORY_FAILURE_RECIPIENT_LAST +# endif +} virDomainMemoryFailureRecipientType; + + +/** + * virDomainMemoryFailureActionType: + * + * Action of a memory failure event. + */ +typedef enum { + /* the memory failure could be ignored. This will only be the case for + * action-optional failures. */ + VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_IGNORE = 0, + + /* memory failure occurred in guest memory, the guest enabled MCE handling + * mechanism, and hypervisor could inject the MCE into the guest + * successfully. */ + VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_INJECT = 1, + + /* the failure is unrecoverable. This occurs for action-required failures + * if the recipient is the hypervisor; hypervisor will exit. */ + VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_FATAL = 2, + + /* the failure is unrecoverable but confined to the guest. This occurs if + * the recipient is a guest which is not ready to handle memory failures. */ + VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_RESET = 3, + +# ifdef VIR_ENUM_SENTINELS + VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_LAST +# endif +} virDomainMemoryFailureActionType; + + +typedef enum { + /* whether a memory failure event is action-required or action-optional + * (e.g. a failure during memory scrub). */ + VIR_DOMAIN_MEMORY_FAILURE_ACTION_REQUIRED = (1 << 0), + + /* whether the failure occurred while the previous failure was still in + * progress. */ + VIR_DOMAIN_MEMORY_FAILURE_RECURSIVE = (1 << 1), +} virDomainMemoryFailureFlags; + + +/** * virConnectDomainEventCallback: * @conn: virConnect connection * @dom: The domain on which the event occurred @@ -4565,6 +4623,29 @@ typedef void (*virConnectDomainEventBlockThresholdCallback)(virConnectPtr conn, void *opaque);
/** + * virConnectDomainEventMemoryFailureCallback: + * @conn: connection object + * @dom: domain on which the event occurred + * @recipient: the recipient of hardware memory failure + * @action: the action of hardware memory failure + * @flags: the flags of hardware memory failure + * @opaque: application specified data + * + * The callback occurs when the hypervisor handles the hardware memory + * corrupted event. + * + * The callback signature to use when registering for an event of type + * VIR_DOMAIN_EVENT_ID_MEMORY_FAILURE with virConnectDomainEventRegisterAny() + */ +typedef void (*virConnectDomainEventMemoryFailureCallback)(virConnectPtr conn, + virDomainPtr dom, + virDomainMemoryFailureRecipientType recipient, + virDomainMemoryFailureActionType action, + unsigned int flags, + void *opaque); + + +/** * VIR_DOMAIN_EVENT_CALLBACK: * * Used to cast the event specific callback into the generic one @@ -4606,6 +4687,7 @@ typedef enum { VIR_DOMAIN_EVENT_ID_DEVICE_REMOVAL_FAILED = 22, /* virConnectDomainEventDeviceRemovalFailedCallback */ VIR_DOMAIN_EVENT_ID_METADATA_CHANGE = 23, /* virConnectDomainEventMetadataChangeCallback */ VIR_DOMAIN_EVENT_ID_BLOCK_THRESHOLD = 24, /* virConnectDomainEventBlockThresholdCallback */ + VIR_DOMAIN_EVENT_ID_MEMORY_FAILURE = 25, /* virConnectDomainEventMemoryFailureCallback */
# ifdef VIR_ENUM_SENTINELS VIR_DOMAIN_EVENT_ID_LAST diff --git a/src/conf/domain_event.c b/src/conf/domain_event.c index a8bd9f1595..4a6051a6ab 100644 --- a/src/conf/domain_event.c +++ b/src/conf/domain_event.c @@ -57,6 +57,7 @@ static virClassPtr virDomainEventJobCompletedClass; static virClassPtr virDomainEventDeviceRemovalFailedClass; static virClassPtr virDomainEventMetadataChangeClass; static virClassPtr virDomainEventBlockThresholdClass; +static virClassPtr virDomainEventMemoryFailureClass;
static void virDomainEventDispose(void *obj); static void virDomainEventLifecycleDispose(void *obj); @@ -79,6 +80,7 @@ static void virDomainEventJobCompletedDispose(void *obj); static void virDomainEventDeviceRemovalFailedDispose(void *obj); static void virDomainEventMetadataChangeDispose(void *obj); static void virDomainEventBlockThresholdDispose(void *obj); +static void virDomainEventMemoryFailureDispose(void *obj);
static void virDomainEventDispatchDefaultFunc(virConnectPtr conn, @@ -287,6 +289,15 @@ struct _virDomainEventBlockThreshold { typedef struct _virDomainEventBlockThreshold virDomainEventBlockThreshold; typedef virDomainEventBlockThreshold *virDomainEventBlockThresholdPtr;
+struct _virDomainEventMemoryFailure { + virDomainEvent parent; + + virDomainMemoryFailureRecipientType recipient; + virDomainMemoryFailureActionType action; + unsigned int flags; +}; +typedef struct _virDomainEventMemoryFailure virDomainEventMemoryFailure; +typedef virDomainEventMemoryFailure *virDomainEventMemoryFailurePtr;
static int virDomainEventsOnceInit(void) @@ -333,6 +344,8 @@ virDomainEventsOnceInit(void) return -1; if (!VIR_CLASS_NEW(virDomainEventBlockThreshold, virDomainEventClass)) return -1; + if (!VIR_CLASS_NEW(virDomainEventMemoryFailure, virDomainEventClass)) + return -1; return 0; }
@@ -542,6 +555,14 @@ virDomainEventBlockThresholdDispose(void *obj) }
+static void +virDomainEventMemoryFailureDispose(void *obj) +{ + virDomainEventMemoryFailurePtr event = obj; + VIR_DEBUG("obj=%p", event); +} + + static void * virDomainEventNew(virClassPtr klass, int eventID, @@ -1619,6 +1640,52 @@ virDomainEventBlockThresholdNewFromDom(virDomainPtr dom, }
+static virObjectEventPtr +virDomainEventMemoryFailureNew(int id, + const char *name, + unsigned char *uuid, + virDomainMemoryFailureRecipientType recipient, + virDomainMemoryFailureActionType action, + unsigned int flags) +{ + virDomainEventMemoryFailurePtr ev; + + if (virDomainEventsInitialize() < 0) + return NULL; + + if (!(ev = virDomainEventNew(virDomainEventMemoryFailureClass, + VIR_DOMAIN_EVENT_ID_MEMORY_FAILURE, + id, name, uuid))) + return NULL; + + ev->recipient = recipient; + ev->action = action; + ev->flags = flags; + + return (virObjectEventPtr)ev; +} + +virObjectEventPtr +virDomainEventMemoryFailureNewFromObj(virDomainObjPtr obj, + virDomainMemoryFailureRecipientType recipient, + virDomainMemoryFailureActionType action, + unsigned int flags) +{ + return virDomainEventMemoryFailureNew(obj->def->id, obj->def->name, + obj->def->uuid, recipient, action, + flags); +} + +virObjectEventPtr +virDomainEventMemoryFailureNewFromDom(virDomainPtr dom, + virDomainMemoryFailureRecipientType recipient, + virDomainMemoryFailureActionType action, + unsigned int flags) +{ + return virDomainEventMemoryFailureNew(dom->id, dom->name, dom->uuid, + recipient, action, flags); +} + static void virDomainEventDispatchDefaultFunc(virConnectPtr conn, virObjectEventPtr event, @@ -1902,6 +1969,19 @@ virDomainEventDispatchDefaultFunc(virConnectPtr conn, cbopaque); goto cleanup; } + case VIR_DOMAIN_EVENT_ID_MEMORY_FAILURE: + { + virDomainEventMemoryFailurePtr memoryFailureEvent; + + memoryFailureEvent = (virDomainEventMemoryFailurePtr)event; + ((virConnectDomainEventMemoryFailureCallback)cb)(conn, dom, + memoryFailureEvent->recipient, + memoryFailureEvent->action, + memoryFailureEvent->flags, + cbopaque); + goto cleanup; + } + case VIR_DOMAIN_EVENT_ID_LAST: break; } diff --git a/src/conf/domain_event.h b/src/conf/domain_event.h index d1cfb81d62..1d001e164e 100644 --- a/src/conf/domain_event.h +++ b/src/conf/domain_event.h @@ -255,6 +255,18 @@ virDomainEventBlockThresholdNewFromDom(virDomainPtr dom, unsigned long long threshold, unsigned long long excess);
+virObjectEventPtr +virDomainEventMemoryFailureNewFromObj(virDomainObjPtr obj, + virDomainMemoryFailureRecipientType recipient, + virDomainMemoryFailureActionType action, + unsigned int flags); + +virObjectEventPtr +virDomainEventMemoryFailureNewFromDom(virDomainPtr dom, + virDomainMemoryFailureRecipientType recipient, + virDomainMemoryFailureActionType action, + unsigned int flags); + int virDomainEventStateRegister(virConnectPtr conn, virObjectEventStatePtr state, diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 152083d220..927de5001a 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -704,6 +704,8 @@ virDomainEventLifecycleNew; virDomainEventLifecycleNewFromDef; virDomainEventLifecycleNewFromDom; virDomainEventLifecycleNewFromObj; +virDomainEventMemoryFailureNewFromDom; +virDomainEventMemoryFailureNewFromObj; virDomainEventMetadataChangeNewFromDom; virDomainEventMetadataChangeNewFromObj; virDomainEventMigrationIterationNewFromDom; diff --git a/src/remote/remote_daemon_dispatch.c b/src/remote/remote_daemon_dispatch.c index 32ebcd8f36..078467f8da 100644 --- a/src/remote/remote_daemon_dispatch.c +++ b/src/remote/remote_daemon_dispatch.c @@ -1302,6 +1302,37 @@ remoteRelayDomainEventBlockThreshold(virConnectPtr conn, }
+static int +remoteRelayDomainEventMemoryFailure(virConnectPtr conn, + virDomainPtr dom, + virDomainMemoryFailureRecipientType recipient, + virDomainMemoryFailureActionType action, + unsigned int flags, + void *opaque) +{ + daemonClientEventCallbackPtr callback = opaque; + remote_domain_event_memory_failure_msg data; + + if (callback->callbackID < 0 || + !remoteRelayDomainEventCheckACL(callback->client, conn, dom)) + return -1; + + /* build return data */ + memset(&data, 0, sizeof(data)); + data.callbackID = callback->callbackID; + data.recipient = recipient; + data.action = action; + data.flags = flags; + make_nonnull_domain(&data.dom, dom); + + remoteDispatchObjectEventSend(callback->client, remoteProgram, + REMOTE_PROC_DOMAIN_EVENT_MEMORY_FAILURE, + (xdrproc_t)xdr_remote_domain_event_memory_failure_msg, &data); + + return 0; +} + + static virConnectDomainEventGenericCallback domainEventCallbacks[] = { VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventLifecycle), VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventReboot), @@ -1328,6 +1359,7 @@ static virConnectDomainEventGenericCallback domainEventCallbacks[] = { VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventDeviceRemovalFailed), VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventMetadataChange), VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventBlockThreshold), + VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventMemoryFailure), };
G_STATIC_ASSERT(G_N_ELEMENTS(domainEventCallbacks) == VIR_DOMAIN_EVENT_ID_LAST); diff --git a/src/remote/remote_driver.c b/src/remote/remote_driver.c index d318224605..9cd2fd36ae 100644 --- a/src/remote/remote_driver.c +++ b/src/remote/remote_driver.c @@ -405,6 +405,11 @@ remoteDomainBuildEventBlockThreshold(virNetClientProgramPtr prog, void *evdata, void *opaque);
static void +remoteDomainBuildEventMemoryFailure(virNetClientProgramPtr prog, + virNetClientPtr client, + void *evdata, void *opaque); + +static void remoteConnectNotifyEventConnectionClosed(virNetClientProgramPtr prog G_GNUC_UNUSED, virNetClientPtr client G_GNUC_UNUSED, void *evdata, void *opaque); @@ -615,6 +620,10 @@ static virNetClientProgramEvent remoteEvents[] = { remoteDomainBuildEventBlockThreshold, sizeof(remote_domain_event_block_threshold_msg), (xdrproc_t)xdr_remote_domain_event_block_threshold_msg }, + { REMOTE_PROC_DOMAIN_EVENT_MEMORY_FAILURE, + remoteDomainBuildEventMemoryFailure, + sizeof(remote_domain_event_memory_failure_msg), + (xdrproc_t)xdr_remote_domain_event_memory_failure_msg }, };
static void @@ -5440,6 +5449,29 @@ remoteDomainBuildEventBlockThreshold(virNetClientProgramPtr prog G_GNUC_UNUSED, }
+static void +remoteDomainBuildEventMemoryFailure(virNetClientProgramPtr prog G_GNUC_UNUSED, + virNetClientPtr client G_GNUC_UNUSED, + void *evdata, void *opaque) +{ + virConnectPtr conn = opaque; + remote_domain_event_memory_failure_msg *msg = evdata; + struct private_data *priv = conn->privateData; + virDomainPtr dom; + virObjectEventPtr event = NULL; + + if (!(dom = get_nonnull_domain(conn, msg->dom))) + return; + + event = virDomainEventMemoryFailureNewFromDom(dom, msg->recipient, + msg->action, msg->flags); + + virObjectUnref(dom); + + virObjectEventStateQueueRemote(priv->eventState, event, msg->callbackID); +} + + static int remoteStreamSend(virStreamPtr st, const char *data, diff --git a/src/remote/remote_protocol.x b/src/remote/remote_protocol.x index f4d6147676..5e5e781e76 100644 --- a/src/remote/remote_protocol.x +++ b/src/remote/remote_protocol.x @@ -3469,6 +3469,14 @@ struct remote_domain_event_callback_metadata_change_msg { remote_string nsuri; };
+struct remote_domain_event_memory_failure_msg { + int callbackID; + remote_nonnull_domain dom; + int recipient; + int action; + unsigned int flags; +}; + struct remote_connect_secret_event_register_any_args { int eventID; remote_secret secret; @@ -6668,5 +6676,11 @@ enum remote_procedure { * @priority: high * @acl: domain:read */ - REMOTE_PROC_DOMAIN_BACKUP_GET_XML_DESC = 422 + REMOTE_PROC_DOMAIN_BACKUP_GET_XML_DESC = 422, + + /** + * @generate: both + * @acl: none + */ + REMOTE_PROC_DOMAIN_EVENT_MEMORY_FAILURE = 423 }; diff --git a/src/remote_protocol-structs b/src/remote_protocol-structs index bae0f0b545..c2ae411885 100644 --- a/src/remote_protocol-structs +++ b/src/remote_protocol-structs @@ -2862,6 +2862,13 @@ struct remote_domain_event_callback_metadata_change_msg { int type; remote_string nsuri; }; +struct remote_domain_event_memory_failure_msg { + int callbackID; + remote_nonnull_domain dom; + int recipient; + int action; + u_int flags; +}; struct remote_connect_secret_event_register_any_args { int eventID; remote_secret secret; @@ -3558,4 +3565,5 @@ enum remote_procedure { REMOTE_PROC_DOMAIN_AGENT_SET_RESPONSE_TIMEOUT = 420, REMOTE_PROC_DOMAIN_BACKUP_BEGIN = 421, REMOTE_PROC_DOMAIN_BACKUP_GET_XML_DESC = 422, + REMOTE_PROC_DOMAIN_EVENT_MEMORY_FAILURE = 423, };

On 10/13/20 8:31 PM, Daniel Henrique Barboza wrote:
This patch failed to compile in my env:
FAILED: tools/virsh.p/virsh-domain.c.o [....] -D_FUNCTION_DEF -MD -MQ tools/virsh.p/virsh-domain.c.o -MF tools/virsh.p/virsh-domain.c.o.d -o tools/virsh.p/virsh-domain.c.o -c ../tools/virsh-domain.c In file included from /usr/lib64/glib-2.0/include/glibconfig.h:9, from /usr/include/glib-2.0/glib/gtypes.h:32, from /usr/include/glib-2.0/glib/galloca.h:32, from /usr/include/glib-2.0/glib.h:30, from ../src/util/glibcompat.h:21, from ../src/internal.h:30, from ../tools/virsh.h:25, from ../tools/virsh-domain.h:23, from ../tools/virsh-domain.c:22: /usr/include/glib-2.0/glib/gmacros.h:745:53: error: size of array ‘_GStaticAssertCompileTimeAssertion_185’ is negative 745 | #define G_STATIC_ASSERT(expr) typedef char G_PASTE (_GStaticAssertCompileTimeAssertion_, __COUNTER__)[(expr) ? 1 : -1] G_GNUC_UNUSED | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /usr/include/glib-2.0/glib/gmacros.h:735:47: note: in definition of macro ‘G_PASTE_ARGS’ 735 | #define G_PASTE_ARGS(identifier1,identifier2) identifier1 ## identifier2 | ^~~~~~~~~~~ /usr/include/glib-2.0/glib/gmacros.h:745:44: note: in expansion of macro ‘G_PASTE’ 745 | #define G_STATIC_ASSERT(expr) typedef char G_PASTE (_GStaticAssertCompileTimeAssertion_, __COUNTER__)[(expr) ? 1 : -1] G_GNUC_UNUSED | ^~~~~~~ ../tools/virsh-domain.c:13643:1: note: in expansion of macro ‘G_STATIC_ASSERT’ 13643 | G_STATIC_ASSERT(VIR_DOMAIN_EVENT_ID_LAST == G_N_ELEMENTS(virshDomainEventCallbacks)); | ^~~~~~~~~~~~~~~ [505/984] Compiling C object src/virtqemud.p/remote_remote_daemon_dispatch.c.o ninja: build stopped: subcommand failed. $
I didn't verify if the following patches fixes it.
Thanks,
DHB
I described it in '[PATCH v2 4/4] virsh: implement memory failure event' Notice: The full patch set includes 4 patches: virsh: implement memory failure event (current patch) qemu: monitor: handle memory failure event qemu: process: implement domainMemoryFailure API: introduce memory failure To avoid build/test errors, the 4 patches should be merged/removed together. Suggested by Peter, separate a 'all in one' patch into 4 patches (described in cover letter '[PATCH v2 0/4] support memory failure'). I forked a repo and pushed the 4 patches(https://gitlab.com/pacepi/libvirt/-/tree/memory-failure-v2), CI worked fine.
On 10/12/20 9:31 AM, zhenwei pi wrote:
Introduce memory failure event. Libvirt should monitor domain's event, then posts it to uplayer. According to the hardware memory corrupted message, the cloud scheduler could migrate domain to another health physical server.
Signed-off-by: zhenwei pi <pizhenwei@bytedance.com> --- include/libvirt/libvirt-domain.h | 82 +++++++++++++++++++++++++++++++++++++ src/conf/domain_event.c | 80 ++++++++++++++++++++++++++++++++++++ src/conf/domain_event.h | 12 ++++++ src/libvirt_private.syms | 2 + src/remote/remote_daemon_dispatch.c | 32 +++++++++++++++ src/remote/remote_driver.c | 32 +++++++++++++++ src/remote/remote_protocol.x | 16 +++++++- src/remote_protocol-structs | 8 ++++ 8 files changed, 263 insertions(+), 1 deletion(-)
diff --git a/include/libvirt/libvirt-domain.h b/include/libvirt/libvirt-domain.h index 77f9116675..5138843a56 100644 --- a/include/libvirt/libvirt-domain.h +++ b/include/libvirt/libvirt-domain.h @@ -3196,6 +3196,64 @@ typedef enum { } virDomainEventCrashedDetailType; /** + * virDomainMemoryFailureRecipientType: + * + * Recipient of a memory failure event. + */ +typedef enum { + /* memory failure at hypersivor memory address space */ + VIR_DOMAIN_EVENT_MEMORY_FAILURE_RECIPIENT_HYPERVISOR = 0, + + /* memory failure at guest memory address space */ + VIR_DOMAIN_EVENT_MEMORY_FAILURE_RECIPIENT_GUEST = 1, + +# ifdef VIR_ENUM_SENTINELS + VIR_DOMAIN_EVENT_MEMORY_FAILURE_RECIPIENT_LAST +# endif +} virDomainMemoryFailureRecipientType; + + +/** + * virDomainMemoryFailureActionType: + * + * Action of a memory failure event. + */ +typedef enum { + /* the memory failure could be ignored. This will only be the case for + * action-optional failures. */ + VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_IGNORE = 0, + + /* memory failure occurred in guest memory, the guest enabled MCE handling + * mechanism, and hypervisor could inject the MCE into the guest + * successfully. */ + VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_INJECT = 1, + + /* the failure is unrecoverable. This occurs for action-required failures + * if the recipient is the hypervisor; hypervisor will exit. */ + VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_FATAL = 2, + + /* the failure is unrecoverable but confined to the guest. This occurs if + * the recipient is a guest which is not ready to handle memory failures. */ + VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_RESET = 3, + +# ifdef VIR_ENUM_SENTINELS + VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_LAST +# endif +} virDomainMemoryFailureActionType; + + +typedef enum { + /* whether a memory failure event is action-required or action-optional + * (e.g. a failure during memory scrub). */ + VIR_DOMAIN_MEMORY_FAILURE_ACTION_REQUIRED = (1 << 0), + + /* whether the failure occurred while the previous failure was still in + * progress. */ + VIR_DOMAIN_MEMORY_FAILURE_RECURSIVE = (1 << 1), +} virDomainMemoryFailureFlags; + + +/** * virConnectDomainEventCallback: * @conn: virConnect connection * @dom: The domain on which the event occurred @@ -4565,6 +4623,29 @@ typedef void (*virConnectDomainEventBlockThresholdCallback)(virConnectPtr conn, void *opaque); /** + * virConnectDomainEventMemoryFailureCallback: + * @conn: connection object + * @dom: domain on which the event occurred + * @recipient: the recipient of hardware memory failure + * @action: the action of hardware memory failure + * @flags: the flags of hardware memory failure + * @opaque: application specified data + * + * The callback occurs when the hypervisor handles the hardware memory + * corrupted event. + * + * The callback signature to use when registering for an event of type + * VIR_DOMAIN_EVENT_ID_MEMORY_FAILURE with virConnectDomainEventRegisterAny() + */ +typedef void (*virConnectDomainEventMemoryFailureCallback)(virConnectPtr conn, + virDomainPtr dom, + virDomainMemoryFailureRecipientType recipient, + virDomainMemoryFailureActionType action, + unsigned int flags, + void *opaque); + + +/** * VIR_DOMAIN_EVENT_CALLBACK: * * Used to cast the event specific callback into the generic one @@ -4606,6 +4687,7 @@ typedef enum { VIR_DOMAIN_EVENT_ID_DEVICE_REMOVAL_FAILED = 22, /* virConnectDomainEventDeviceRemovalFailedCallback */ VIR_DOMAIN_EVENT_ID_METADATA_CHANGE = 23, /* virConnectDomainEventMetadataChangeCallback */ VIR_DOMAIN_EVENT_ID_BLOCK_THRESHOLD = 24, /* virConnectDomainEventBlockThresholdCallback */ + VIR_DOMAIN_EVENT_ID_MEMORY_FAILURE = 25, /* virConnectDomainEventMemoryFailureCallback */ # ifdef VIR_ENUM_SENTINELS VIR_DOMAIN_EVENT_ID_LAST diff --git a/src/conf/domain_event.c b/src/conf/domain_event.c index a8bd9f1595..4a6051a6ab 100644 --- a/src/conf/domain_event.c +++ b/src/conf/domain_event.c @@ -57,6 +57,7 @@ static virClassPtr virDomainEventJobCompletedClass; static virClassPtr virDomainEventDeviceRemovalFailedClass; static virClassPtr virDomainEventMetadataChangeClass; static virClassPtr virDomainEventBlockThresholdClass; +static virClassPtr virDomainEventMemoryFailureClass; static void virDomainEventDispose(void *obj); static void virDomainEventLifecycleDispose(void *obj); @@ -79,6 +80,7 @@ static void virDomainEventJobCompletedDispose(void *obj); static void virDomainEventDeviceRemovalFailedDispose(void *obj); static void virDomainEventMetadataChangeDispose(void *obj); static void virDomainEventBlockThresholdDispose(void *obj); +static void virDomainEventMemoryFailureDispose(void *obj); static void virDomainEventDispatchDefaultFunc(virConnectPtr conn, @@ -287,6 +289,15 @@ struct _virDomainEventBlockThreshold { typedef struct _virDomainEventBlockThreshold virDomainEventBlockThreshold; typedef virDomainEventBlockThreshold *virDomainEventBlockThresholdPtr; +struct _virDomainEventMemoryFailure { + virDomainEvent parent; + + virDomainMemoryFailureRecipientType recipient; + virDomainMemoryFailureActionType action; + unsigned int flags; +}; +typedef struct _virDomainEventMemoryFailure virDomainEventMemoryFailure; +typedef virDomainEventMemoryFailure *virDomainEventMemoryFailurePtr; static int virDomainEventsOnceInit(void) @@ -333,6 +344,8 @@ virDomainEventsOnceInit(void) return -1; if (!VIR_CLASS_NEW(virDomainEventBlockThreshold, virDomainEventClass)) return -1; + if (!VIR_CLASS_NEW(virDomainEventMemoryFailure, virDomainEventClass)) + return -1; return 0; } @@ -542,6 +555,14 @@ virDomainEventBlockThresholdDispose(void *obj) } +static void +virDomainEventMemoryFailureDispose(void *obj) +{ + virDomainEventMemoryFailurePtr event = obj; + VIR_DEBUG("obj=%p", event); +} + + static void * virDomainEventNew(virClassPtr klass, int eventID, @@ -1619,6 +1640,52 @@ virDomainEventBlockThresholdNewFromDom(virDomainPtr dom, } +static virObjectEventPtr +virDomainEventMemoryFailureNew(int id, + const char *name, + unsigned char *uuid, + virDomainMemoryFailureRecipientType recipient, + virDomainMemoryFailureActionType action, + unsigned int flags) +{ + virDomainEventMemoryFailurePtr ev; + + if (virDomainEventsInitialize() < 0) + return NULL; + + if (!(ev = virDomainEventNew(virDomainEventMemoryFailureClass, + VIR_DOMAIN_EVENT_ID_MEMORY_FAILURE, + id, name, uuid))) + return NULL; + + ev->recipient = recipient; + ev->action = action; + ev->flags = flags; + + return (virObjectEventPtr)ev; +} + +virObjectEventPtr +virDomainEventMemoryFailureNewFromObj(virDomainObjPtr obj, + virDomainMemoryFailureRecipientType recipient, + virDomainMemoryFailureActionType action, + unsigned int flags) +{ + return virDomainEventMemoryFailureNew(obj->def->id, obj->def->name, + obj->def->uuid, recipient, action, + flags); +} + +virObjectEventPtr +virDomainEventMemoryFailureNewFromDom(virDomainPtr dom, + virDomainMemoryFailureRecipientType recipient, + virDomainMemoryFailureActionType action, + unsigned int flags) +{ + return virDomainEventMemoryFailureNew(dom->id, dom->name, dom->uuid, + recipient, action, flags); +} + static void virDomainEventDispatchDefaultFunc(virConnectPtr conn, virObjectEventPtr event, @@ -1902,6 +1969,19 @@ virDomainEventDispatchDefaultFunc(virConnectPtr conn,
cbopaque); goto cleanup; } + case VIR_DOMAIN_EVENT_ID_MEMORY_FAILURE: + { + virDomainEventMemoryFailurePtr memoryFailureEvent; + + memoryFailureEvent = (virDomainEventMemoryFailurePtr)event; + ((virConnectDomainEventMemoryFailureCallback)cb)(conn, dom, + memoryFailureEvent->recipient, + memoryFailureEvent->action, + memoryFailureEvent->flags, + cbopaque); + goto cleanup; + } + case VIR_DOMAIN_EVENT_ID_LAST: break; } diff --git a/src/conf/domain_event.h b/src/conf/domain_event.h index d1cfb81d62..1d001e164e 100644 --- a/src/conf/domain_event.h +++ b/src/conf/domain_event.h @@ -255,6 +255,18 @@ virDomainEventBlockThresholdNewFromDom(virDomainPtr dom, unsigned long long threshold, unsigned long long excess); +virObjectEventPtr +virDomainEventMemoryFailureNewFromObj(virDomainObjPtr obj, + virDomainMemoryFailureRecipientType recipient, + virDomainMemoryFailureActionType action, + unsigned int flags); + +virObjectEventPtr +virDomainEventMemoryFailureNewFromDom(virDomainPtr dom, + virDomainMemoryFailureRecipientType recipient, + virDomainMemoryFailureActionType action, + unsigned int flags); + int virDomainEventStateRegister(virConnectPtr conn, virObjectEventStatePtr state, diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 152083d220..927de5001a 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -704,6 +704,8 @@ virDomainEventLifecycleNew; virDomainEventLifecycleNewFromDef; virDomainEventLifecycleNewFromDom; virDomainEventLifecycleNewFromObj; +virDomainEventMemoryFailureNewFromDom; +virDomainEventMemoryFailureNewFromObj; virDomainEventMetadataChangeNewFromDom; virDomainEventMetadataChangeNewFromObj; virDomainEventMigrationIterationNewFromDom; diff --git a/src/remote/remote_daemon_dispatch.c b/src/remote/remote_daemon_dispatch.c index 32ebcd8f36..078467f8da 100644 --- a/src/remote/remote_daemon_dispatch.c +++ b/src/remote/remote_daemon_dispatch.c @@ -1302,6 +1302,37 @@ remoteRelayDomainEventBlockThreshold(virConnectPtr conn, } +static int +remoteRelayDomainEventMemoryFailure(virConnectPtr conn, + virDomainPtr dom, + virDomainMemoryFailureRecipientType recipient, + virDomainMemoryFailureActionType action, + unsigned int flags, + void *opaque) +{ + daemonClientEventCallbackPtr callback = opaque; + remote_domain_event_memory_failure_msg data; + + if (callback->callbackID < 0 || + !remoteRelayDomainEventCheckACL(callback->client, conn, dom)) + return -1; + + /* build return data */ + memset(&data, 0, sizeof(data)); + data.callbackID = callback->callbackID; + data.recipient = recipient; + data.action = action; + data.flags = flags; + make_nonnull_domain(&data.dom, dom); + + remoteDispatchObjectEventSend(callback->client, remoteProgram, + REMOTE_PROC_DOMAIN_EVENT_MEMORY_FAILURE, + (xdrproc_t)xdr_remote_domain_event_memory_failure_msg, &data); + + return 0; +} + + static virConnectDomainEventGenericCallback domainEventCallbacks[] = { VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventLifecycle), VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventReboot), @@ -1328,6 +1359,7 @@ static virConnectDomainEventGenericCallback domainEventCallbacks[] = {
VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventDeviceRemovalFailed), VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventMetadataChange), VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventBlockThreshold), + VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventMemoryFailure), }; G_STATIC_ASSERT(G_N_ELEMENTS(domainEventCallbacks) == VIR_DOMAIN_EVENT_ID_LAST); diff --git a/src/remote/remote_driver.c b/src/remote/remote_driver.c index d318224605..9cd2fd36ae 100644 --- a/src/remote/remote_driver.c +++ b/src/remote/remote_driver.c @@ -405,6 +405,11 @@ remoteDomainBuildEventBlockThreshold(virNetClientProgramPtr prog, void *evdata, void *opaque); static void +remoteDomainBuildEventMemoryFailure(virNetClientProgramPtr prog, + virNetClientPtr client, + void *evdata, void *opaque); + +static void remoteConnectNotifyEventConnectionClosed(virNetClientProgramPtr prog G_GNUC_UNUSED, virNetClientPtr client G_GNUC_UNUSED, void *evdata, void *opaque); @@ -615,6 +620,10 @@ static virNetClientProgramEvent remoteEvents[] = { remoteDomainBuildEventBlockThreshold, sizeof(remote_domain_event_block_threshold_msg), (xdrproc_t)xdr_remote_domain_event_block_threshold_msg }, + { REMOTE_PROC_DOMAIN_EVENT_MEMORY_FAILURE, + remoteDomainBuildEventMemoryFailure, + sizeof(remote_domain_event_memory_failure_msg), + (xdrproc_t)xdr_remote_domain_event_memory_failure_msg }, }; static void @@ -5440,6 +5449,29 @@ remoteDomainBuildEventBlockThreshold(virNetClientProgramPtr prog G_GNUC_UNUSED, } +static void +remoteDomainBuildEventMemoryFailure(virNetClientProgramPtr prog G_GNUC_UNUSED, + virNetClientPtr client G_GNUC_UNUSED, + void *evdata, void *opaque) +{ + virConnectPtr conn = opaque; + remote_domain_event_memory_failure_msg *msg = evdata; + struct private_data *priv = conn->privateData; + virDomainPtr dom; + virObjectEventPtr event = NULL; + + if (!(dom = get_nonnull_domain(conn, msg->dom))) + return; + + event = virDomainEventMemoryFailureNewFromDom(dom, msg->recipient, + msg->action, msg->flags); + + virObjectUnref(dom); + + virObjectEventStateQueueRemote(priv->eventState, event, msg->callbackID); +} + + static int remoteStreamSend(virStreamPtr st, const char *data, diff --git a/src/remote/remote_protocol.x b/src/remote/remote_protocol.x index f4d6147676..5e5e781e76 100644 --- a/src/remote/remote_protocol.x +++ b/src/remote/remote_protocol.x @@ -3469,6 +3469,14 @@ struct remote_domain_event_callback_metadata_change_msg { remote_string nsuri; }; +struct remote_domain_event_memory_failure_msg { + int callbackID; + remote_nonnull_domain dom; + int recipient; + int action; + unsigned int flags; +}; + struct remote_connect_secret_event_register_any_args { int eventID; remote_secret secret; @@ -6668,5 +6676,11 @@ enum remote_procedure { * @priority: high * @acl: domain:read */ - REMOTE_PROC_DOMAIN_BACKUP_GET_XML_DESC = 422 + REMOTE_PROC_DOMAIN_BACKUP_GET_XML_DESC = 422, + + /** + * @generate: both + * @acl: none + */ + REMOTE_PROC_DOMAIN_EVENT_MEMORY_FAILURE = 423 }; diff --git a/src/remote_protocol-structs b/src/remote_protocol-structs index bae0f0b545..c2ae411885 100644 --- a/src/remote_protocol-structs +++ b/src/remote_protocol-structs @@ -2862,6 +2862,13 @@ struct remote_domain_event_callback_metadata_change_msg { int type; remote_string nsuri; }; +struct remote_domain_event_memory_failure_msg { + int callbackID; + remote_nonnull_domain dom; + int recipient; + int action; + u_int flags; +}; struct remote_connect_secret_event_register_any_args { int eventID; remote_secret secret; @@ -3558,4 +3565,5 @@ enum remote_procedure { REMOTE_PROC_DOMAIN_AGENT_SET_RESPONSE_TIMEOUT = 420, REMOTE_PROC_DOMAIN_BACKUP_BEGIN = 421, REMOTE_PROC_DOMAIN_BACKUP_GET_XML_DESC = 422, + REMOTE_PROC_DOMAIN_EVENT_MEMORY_FAILURE = 423, };
-- zhenwei pi

On Wed, Oct 14, 2020 at 10:44:53 +0800, zhenwei pi wrote:
On 10/13/20 8:31 PM, Daniel Henrique Barboza wrote:
This patch failed to compile in my env:
FAILED: tools/virsh.p/virsh-domain.c.o [....] -D_FUNCTION_DEF -MD -MQ tools/virsh.p/virsh-domain.c.o -MF tools/virsh.p/virsh-domain.c.o.d -o tools/virsh.p/virsh-domain.c.o -c ../tools/virsh-domain.c In file included from /usr/lib64/glib-2.0/include/glibconfig.h:9, from /usr/include/glib-2.0/glib/gtypes.h:32, from /usr/include/glib-2.0/glib/galloca.h:32, from /usr/include/glib-2.0/glib.h:30, from ../src/util/glibcompat.h:21, from ../src/internal.h:30, from ../tools/virsh.h:25, from ../tools/virsh-domain.h:23, from ../tools/virsh-domain.c:22: /usr/include/glib-2.0/glib/gmacros.h:745:53: error: size of array ‘_GStaticAssertCompileTimeAssertion_185’ is negative 745 | #define G_STATIC_ASSERT(expr) typedef char G_PASTE (_GStaticAssertCompileTimeAssertion_, __COUNTER__)[(expr) ? 1 : -1] G_GNUC_UNUSED | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /usr/include/glib-2.0/glib/gmacros.h:735:47: note: in definition of macro ‘G_PASTE_ARGS’ 735 | #define G_PASTE_ARGS(identifier1,identifier2) identifier1 ## identifier2 | ^~~~~~~~~~~ /usr/include/glib-2.0/glib/gmacros.h:745:44: note: in expansion of macro ‘G_PASTE’ 745 | #define G_STATIC_ASSERT(expr) typedef char G_PASTE (_GStaticAssertCompileTimeAssertion_, __COUNTER__)[(expr) ? 1 : -1] G_GNUC_UNUSED | ^~~~~~~ ../tools/virsh-domain.c:13643:1: note: in expansion of macro ‘G_STATIC_ASSERT’ 13643 | G_STATIC_ASSERT(VIR_DOMAIN_EVENT_ID_LAST == G_N_ELEMENTS(virshDomainEventCallbacks)); | ^~~~~~~~~~~~~~~ [505/984] Compiling C object src/virtqemud.p/remote_remote_daemon_dispatch.c.o ninja: build stopped: subcommand failed. $
I didn't verify if the following patches fixes it.
Thanks,
DHB
I described it in '[PATCH v2 4/4] virsh: implement memory failure event'
Notice: The full patch set includes 4 patches: virsh: implement memory failure event (current patch) qemu: monitor: handle memory failure event qemu: process: implement domainMemoryFailure API: introduce memory failure
To avoid build/test errors, the 4 patches should be merged/removed together.
No, they just need to be moved into appropriate order so that they can be built. There's no point splitting them just to merge them later.
Suggested by Peter, separate a 'all in one' patch into 4 patches (described in cover letter '[PATCH v2 0/4] support memory failure'). I forked a repo and pushed the 4 patches(https://gitlab.com/pacepi/libvirt/-/tree/memory-failure-v2), CI worked fine.
Our contribution guidelines required that the tree builds successfully after every single patch: https://libvirt.org/hacking.html Section "Preparing patches": "If you're going to submit multiple patches, the automated tests must pass *after each patch*, not just after the last one."

On 10/14/20 4:44 AM, zhenwei pi wrote:
I described it in '[PATCH v2 4/4] virsh: implement memory failure event'
Notice: The full patch set includes 4 patches: virsh: implement memory failure event (current patch) qemu: monitor: handle memory failure event qemu: process: implement domainMemoryFailure API: introduce memory failure
To avoid build/test errors, the 4 patches should be merged/removed together.
Suggested by Peter, separate a 'all in one' patch into 4 patches (described in cover letter '[PATCH v2 0/4] support memory failure'). I forked a repo and pushed the 4 patches(https://gitlab.com/pacepi/libvirt/-/tree/memory-failure-v2), CI worked fine.
To add to Peter's reply: the reason we require each patch to build on its own is backport. You are not doing it necessarily in your series, but if for instance you'd move some code then: 1) it's a standalone change that should be in a separate patch 2) if a distribution wants to backport the patch (e.g. because it is a prerequisite for some other patch), then the code base should build without having to backport irrelevant patches. And to some extent your patches can be viewed as backportable. I can imagine that somebody writes the code for other driver (say libxl) and a distro might want to backport it. It will need to backport this patch which adds the RPC and client facing APIs. But at the current state it can't do so because it won't compile. Looking forward to v3. Michal

Implement domainMemoryFailure callback function to handle a domain memory failure event. Convert QEMU specified parameters to a common libvirt domain event, and send to uplayer. Notice that, this commit could compiling with the following patch (introduce QEMU specified memory failure parameters). Signed-off-by: zhenwei pi <pizhenwei@bytedance.com> --- src/qemu/qemu_process.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 6b5de29fdb..7c24677ecb 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -1878,6 +1878,64 @@ qemuProcessHandleGuestCrashloaded(qemuMonitorPtr mon G_GNUC_UNUSED, } +static int +qemuProcessHandleMemoryFailure(qemuMonitorPtr mon G_GNUC_UNUSED, + virDomainObjPtr vm, + qemuMonitorEventMemoryFailurePtr mfp, + void *opaque) +{ + virQEMUDriverPtr driver = opaque; + virObjectEventPtr event = NULL; + virDomainMemoryFailureRecipientType recipient; + virDomainMemoryFailureActionType action; + unsigned int flags = 0; + + switch (mfp->recipient) { + case QEMU_MONITOR_MEMORY_FAILURE_RECIPIENT_HYPERVISOR: + recipient = VIR_DOMAIN_EVENT_MEMORY_FAILURE_RECIPIENT_HYPERVISOR; + break; + case QEMU_MONITOR_MEMORY_FAILURE_RECIPIENT_GUEST: + recipient = VIR_DOMAIN_EVENT_MEMORY_FAILURE_RECIPIENT_GUEST; + break; + case QEMU_MONITOR_MEMORY_FAILURE_RECIPIENT_LAST: + default: + virReportError(VIR_ERR_INVALID_ARG, "%s", + _("requested unknown memory failure recipient")); + return -1; + } + + switch (mfp->action) { + case QEMU_MONITOR_MEMORY_FAILURE_ACTION_IGNORE: + action = VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_IGNORE; + break; + case QEMU_MONITOR_MEMORY_FAILURE_ACTION_INJECT: + action = VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_INJECT; + break; + case QEMU_MONITOR_MEMORY_FAILURE_ACTION_FATAL: + action = VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_FATAL; + break; + case QEMU_MONITOR_MEMORY_FAILURE_ACTION_RESET: + action = VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_RESET; + break; + case QEMU_MONITOR_MEMORY_FAILURE_ACTION_LAST: + default: + virReportError(VIR_ERR_INVALID_ARG, "%s", + _("requested unknown memory failure action")); + return -1; + } + + if (mfp->action_required) + flags |= VIR_DOMAIN_MEMORY_FAILURE_ACTION_REQUIRED; + if (mfp->recursive) + flags |= VIR_DOMAIN_MEMORY_FAILURE_RECURSIVE; + + event = virDomainEventMemoryFailureNewFromObj(vm, recipient, action, flags); + virObjectEventStateQueue(driver->domainEventState, event); + + return 0; +} + + static qemuMonitorCallbacks monitorCallbacks = { .eofNotify = qemuProcessHandleMonitorEOF, .errorNotify = qemuProcessHandleMonitorError, @@ -1910,6 +1968,7 @@ static qemuMonitorCallbacks monitorCallbacks = { .domainPRManagerStatusChanged = qemuProcessHandlePRManagerStatusChanged, .domainRdmaGidStatusChanged = qemuProcessHandleRdmaGidStatusChanged, .domainGuestCrashloaded = qemuProcessHandleGuestCrashloaded, + .domainMemoryFailure = qemuProcessHandleMemoryFailure, }; static void -- 2.11.0

Handle memory failure by json string from QEMU, then process by domainMemoryFailure callback function. Signed-off-by: zhenwei pi <pizhenwei@bytedance.com> --- src/qemu/qemu_monitor.c | 21 ++++++++++++++++++- src/qemu/qemu_monitor.h | 39 +++++++++++++++++++++++++++++++++++ src/qemu/qemu_monitor_json.c | 49 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 108 insertions(+), 1 deletion(-) diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c index 8c991fefbb..189b789bb8 100644 --- a/src/qemu/qemu_monitor.c +++ b/src/qemu/qemu_monitor.c @@ -159,7 +159,6 @@ static int qemuMonitorOnceInit(void) VIR_ONCE_GLOBAL_INIT(qemuMonitor); - VIR_ENUM_IMPL(qemuMonitorMigrationStatus, QEMU_MONITOR_MIGRATION_STATUS_LAST, "inactive", "setup", @@ -197,6 +196,14 @@ VIR_ENUM_IMPL(qemuMonitorDumpStatus, "none", "active", "completed", "failed", ); +VIR_ENUM_IMPL(qemuMonitorMemoryFailureRecipient, + QEMU_MONITOR_MEMORY_FAILURE_RECIPIENT_LAST, + "hypervisor", "guest"); + +VIR_ENUM_IMPL(qemuMonitorMemoryFailureAction, + QEMU_MONITOR_MEMORY_FAILURE_ACTION_LAST, + "ignore", "inject", + "fatal", "reset"); #if DEBUG_RAW_IO static char * @@ -1428,6 +1435,18 @@ qemuMonitorEmitSpiceMigrated(qemuMonitorPtr mon) int +qemuMonitorEmitMemoryFailure(qemuMonitorPtr mon, + qemuMonitorEventMemoryFailurePtr mfp) +{ + int ret = -1; + + QEMU_MONITOR_CALLBACK(mon, ret, domainMemoryFailure, mon->vm, mfp); + + return ret; +} + + +int qemuMonitorEmitMigrationStatus(qemuMonitorPtr mon, int status) { diff --git a/src/qemu/qemu_monitor.h b/src/qemu/qemu_monitor.h index a744c8975b..17ba006a2f 100644 --- a/src/qemu/qemu_monitor.h +++ b/src/qemu/qemu_monitor.h @@ -340,6 +340,40 @@ typedef int (*qemuMonitorDomainGuestCrashloadedCallback)(qemuMonitorPtr mon, virDomainObjPtr vm, void *opaque); +typedef enum { + QEMU_MONITOR_MEMORY_FAILURE_RECIPIENT_HYPERVISOR, + QEMU_MONITOR_MEMORY_FAILURE_RECIPIENT_GUEST, + + QEMU_MONITOR_MEMORY_FAILURE_RECIPIENT_LAST +} qemuMonitorMemoryFailureRecipient; + +VIR_ENUM_DECL(qemuMonitorMemoryFailureRecipient); + +typedef enum { + QEMU_MONITOR_MEMORY_FAILURE_ACTION_IGNORE, + QEMU_MONITOR_MEMORY_FAILURE_ACTION_INJECT, + QEMU_MONITOR_MEMORY_FAILURE_ACTION_FATAL, + QEMU_MONITOR_MEMORY_FAILURE_ACTION_RESET, + + QEMU_MONITOR_MEMORY_FAILURE_ACTION_LAST +} qemuMonitorMemoryFailureAction; + +VIR_ENUM_DECL(qemuMonitorMemoryFailureAction); + +typedef struct _qemuMonitorEventMemoryFailure qemuMonitorEventMemoryFailure; +typedef qemuMonitorEventMemoryFailure *qemuMonitorEventMemoryFailurePtr; +struct _qemuMonitorEventMemoryFailure { + qemuMonitorMemoryFailureRecipient recipient; + qemuMonitorMemoryFailureAction action; + bool action_required; + bool recursive; +}; + +typedef int (*qemuMonitorDomainMemoryFailureCallback)(qemuMonitorPtr mon, + virDomainObjPtr vm, + qemuMonitorEventMemoryFailurePtr mfp, + void *opaque); + typedef struct _qemuMonitorCallbacks qemuMonitorCallbacks; typedef qemuMonitorCallbacks *qemuMonitorCallbacksPtr; struct _qemuMonitorCallbacks { @@ -376,6 +410,7 @@ struct _qemuMonitorCallbacks { qemuMonitorDomainPRManagerStatusChangedCallback domainPRManagerStatusChanged; qemuMonitorDomainRdmaGidStatusChangedCallback domainRdmaGidStatusChanged; qemuMonitorDomainGuestCrashloadedCallback domainGuestCrashloaded; + qemuMonitorDomainMemoryFailureCallback domainMemoryFailure; }; qemuMonitorPtr qemuMonitorOpen(virDomainObjPtr vm, @@ -475,6 +510,10 @@ int qemuMonitorEmitSerialChange(qemuMonitorPtr mon, const char *devAlias, bool connected); int qemuMonitorEmitSpiceMigrated(qemuMonitorPtr mon); + +int qemuMonitorEmitMemoryFailure(qemuMonitorPtr mon, + qemuMonitorEventMemoryFailurePtr mfp); + int qemuMonitorEmitMigrationStatus(qemuMonitorPtr mon, int status); int qemuMonitorEmitMigrationPass(qemuMonitorPtr mon, diff --git a/src/qemu/qemu_monitor_json.c b/src/qemu/qemu_monitor_json.c index 26ac499fc5..aa256727d6 100644 --- a/src/qemu/qemu_monitor_json.c +++ b/src/qemu/qemu_monitor_json.c @@ -112,6 +112,7 @@ static void qemuMonitorJSONHandleBlockThreshold(qemuMonitorPtr mon, virJSONValue static void qemuMonitorJSONHandleDumpCompleted(qemuMonitorPtr mon, virJSONValuePtr data); static void qemuMonitorJSONHandlePRManagerStatusChanged(qemuMonitorPtr mon, virJSONValuePtr data); static void qemuMonitorJSONHandleRdmaGidStatusChanged(qemuMonitorPtr mon, virJSONValuePtr data); +static void qemuMonitorJSONHandleMemoryFailure(qemuMonitorPtr mon, virJSONValuePtr data); typedef struct { const char *type; @@ -132,6 +133,7 @@ static qemuEventHandler eventHandlers[] = { { "GUEST_CRASHLOADED", qemuMonitorJSONHandleGuestCrashloaded, }, { "GUEST_PANICKED", qemuMonitorJSONHandleGuestPanic, }, { "JOB_STATUS_CHANGE", qemuMonitorJSONHandleJobStatusChange, }, + { "MEMORY_FAILURE", qemuMonitorJSONHandleMemoryFailure, }, { "MIGRATION", qemuMonitorJSONHandleMigrationStatus, }, { "MIGRATION_PASS", qemuMonitorJSONHandleMigrationPass, }, { "NIC_RX_FILTER_CHANGED", qemuMonitorJSONHandleNicRxFilterChanged, }, @@ -1336,6 +1338,53 @@ qemuMonitorJSONHandleSpiceMigrated(qemuMonitorPtr mon, static void +qemuMonitorJSONHandleMemoryFailure(qemuMonitorPtr mon, + virJSONValuePtr data) +{ + virJSONValuePtr flagsjson = virJSONValueObjectGetObject(data, "flags"); + const char *str; + int recipient; + int action; + bool ar = false; + bool recursive = false; + qemuMonitorEventMemoryFailure mf = {0}; + + if (!(str = virJSONValueObjectGetString(data, "recipient"))) { + VIR_WARN("missing recipient in memory failure event"); + return; + } + + recipient = qemuMonitorMemoryFailureRecipientTypeFromString(str); + if (recipient == -1) { + VIR_WARN("unknown recipient '%s' in memory_failure event", str); + return; + } + + if (!(str = virJSONValueObjectGetString(data, "action"))) { + VIR_WARN("missing action in memory failure event"); + return; + } + + action = qemuMonitorMemoryFailureActionTypeFromString(str); + if (action == -1) { + VIR_WARN("unknown action '%s' in memory_failure event", str); + return; + } + + if (flagsjson) { + virJSONValueObjectGetBoolean(flagsjson, "action-required", &ar); + virJSONValueObjectGetBoolean(flagsjson, "recursive", &recursive); + } + + mf.recipient = recipient; + mf.action = action; + mf.action_required = ar; + mf.recursive = recursive; + qemuMonitorEmitMemoryFailure(mon, &mf); +} + + +static void qemuMonitorJSONHandleMigrationStatus(qemuMonitorPtr mon, virJSONValuePtr data) { -- 2.11.0

Implement memory failure event for virsh command and test. Notice: The full patch set includes 4 patches: virsh: implement memory failure event (current patch) qemu: monitor: handle memory failure event qemu: process: implement domainMemoryFailure API: introduce memory failure To avoid build/test errors, the 4 patches should be merged/removed together. Test all the patches with a little complex environment (nested KVM): 1, install newly built libvirt in L1, and start a L2 vm. run command in L1: ~# virsh event l2 --event memory-failure 2, run command in L0 to inject MCE to L1: ~# virsh qemu-monitor-command l1 --hmp mce 0 9 0xbd000000000000c0 0xd 0x62000000 0x8c Test result in l1(recipient hypervisor case): event 'memory-failure' for domain l2: recipient: hypervisor action: ignore flags: action required: 0 recursive: 0 Test result in l1(recipient guest case): event 'memory-failure' for domain l2: recipient: guest action: inject flags: action required: 0 recursive: 0 Signed-off-by: zhenwei pi <pizhenwei@bytedance.com> --- examples/c/misc/event-test.c | 16 ++++++++++++++++ tools/virsh-domain.c | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/examples/c/misc/event-test.c b/examples/c/misc/event-test.c index 52caa8ffa8..1651efe019 100644 --- a/examples/c/misc/event-test.c +++ b/examples/c/misc/event-test.c @@ -964,6 +964,21 @@ myDomainEventBlockThresholdCallback(virConnectPtr conn G_GNUC_UNUSED, static int +myDomainEventMemoryFailureCallback(virConnectPtr conn G_GNUC_UNUSED, + virDomainPtr dom, + virDomainMemoryFailureRecipientType recipient, + virDomainMemoryFailureActionType action, + unsigned int flags, + void *opaque G_GNUC_UNUSED) +{ + printf("%s EVENT: Domain %s(%d) memory failure: recipient '%d', " + "aciont '%d', flags '%d'", __func__, virDomainGetName(dom), + virDomainGetID(dom), recipient, action, flags); + return 0; +} + + +static int myDomainEventMigrationIterationCallback(virConnectPtr conn G_GNUC_UNUSED, virDomainPtr dom, int iteration, @@ -1093,6 +1108,7 @@ struct domainEventData domainEvents[] = { DOMAIN_EVENT(VIR_DOMAIN_EVENT_ID_DEVICE_REMOVAL_FAILED, myDomainEventDeviceRemovalFailedCallback), DOMAIN_EVENT(VIR_DOMAIN_EVENT_ID_METADATA_CHANGE, myDomainEventMetadataChangeCallback), DOMAIN_EVENT(VIR_DOMAIN_EVENT_ID_BLOCK_THRESHOLD, myDomainEventBlockThresholdCallback), + DOMAIN_EVENT(VIR_DOMAIN_EVENT_ID_MEMORY_FAILURE, myDomainEventMemoryFailureCallback), }; struct storagePoolEventData { diff --git a/tools/virsh-domain.c b/tools/virsh-domain.c index 8f11393197..2bfb33e528 100644 --- a/tools/virsh-domain.c +++ b/tools/virsh-domain.c @@ -13590,6 +13590,44 @@ virshEventBlockThresholdPrint(virConnectPtr conn G_GNUC_UNUSED, } +VIR_ENUM_DECL(virshEventMemoryFailureRecipientType); +VIR_ENUM_IMPL(virshEventMemoryFailureRecipientType, + VIR_DOMAIN_EVENT_MEMORY_FAILURE_RECIPIENT_LAST, + N_("hypervisor"), + N_("guest")); + +VIR_ENUM_DECL(virshEventMemoryFailureActionType); +VIR_ENUM_IMPL(virshEventMemoryFailureActionType, + VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_LAST, + N_("ignore"), + N_("inject"), + N_("fatal"), + N_("reset")); + +static void +virshEventMemoryFailurePrint(virConnectPtr conn G_GNUC_UNUSED, + virDomainPtr dom, + virDomainMemoryFailureRecipientType recipient, + virDomainMemoryFailureActionType action, + unsigned int flags, + void *opaque) +{ + g_auto(virBuffer) buf = VIR_BUFFER_INITIALIZER; + + virBufferAsprintf(&buf, _("event 'memory-failure' for domain %s:\n" + "recipient: %s\naction: %s\n"), + virDomainGetName(dom), + UNKNOWNSTR(virshEventMemoryFailureRecipientTypeTypeToString(recipient)), + UNKNOWNSTR(virshEventMemoryFailureActionTypeTypeToString(action))); + virBufferAsprintf(&buf, _("flags:\n" + "\taction required: %d\n\trecursive: %d\n"), + !!(flags & VIR_DOMAIN_MEMORY_FAILURE_ACTION_REQUIRED), + !!(flags & VIR_DOMAIN_MEMORY_FAILURE_RECURSIVE)); + + virshEventPrint(opaque, &buf); +} + + virshDomainEventCallback virshDomainEventCallbacks[] = { { "lifecycle", VIR_DOMAIN_EVENT_CALLBACK(virshEventLifecyclePrint), }, @@ -13639,6 +13677,8 @@ virshDomainEventCallback virshDomainEventCallbacks[] = { VIR_DOMAIN_EVENT_CALLBACK(virshEventMetadataChangePrint), }, { "block-threshold", VIR_DOMAIN_EVENT_CALLBACK(virshEventBlockThresholdPrint), }, + { "memory-failure", + VIR_DOMAIN_EVENT_CALLBACK(virshEventMemoryFailurePrint), }, }; G_STATIC_ASSERT(VIR_DOMAIN_EVENT_ID_LAST == G_N_ELEMENTS(virshDomainEventCallbacks)); -- 2.11.0
participants (4)
-
Daniel Henrique Barboza
-
Michal Privoznik
-
Peter Krempa
-
zhenwei pi