[libvirt] [PATCH v4 0/5] libvirt supports Guest Panicked

Changes: v3-v4: 1. Supports the dumpcore options of the oncrash element in the XML. 2. Move the previous code to processWatchdogEvent(). v2-v3: 1. split into 3 patches v1-v2: 1. fix the incorrect domain state: paused -> crashed, when crash the guest while libvirt isn't running, then restart libvirtd. Chen Fan (5): libvirt: Define domain crash event types qemu: Supports guest panicked virsh: supports guest panicked libvirt: Define crash dumpcore events in watchdogAction. qemu: Implement the oncrash events in processWatchdogEvent. examples/domain-events/events-c/event-test.c | 10 ++ include/libvirt/libvirt.h.in | 16 +++ src/conf/domain_conf.c | 16 ++- src/conf/domain_conf.h | 4 + src/qemu/qemu_driver.c | 197 +++++++++++++++++++++------ src/qemu/qemu_monitor.c | 14 +- src/qemu/qemu_monitor.h | 5 + src/qemu/qemu_monitor_json.c | 7 + src/qemu/qemu_process.c | 127 ++++++++++++++--- src/qemu/qemu_process.h | 2 + tools/virsh-domain-monitor.c | 8 ++ 11 files changed, 338 insertions(+), 68 deletions(-) -- 1.8.1.4

This patch introduces domain crashed types and crashed reasons which will be used while guest panicked. --- examples/domain-events/events-c/event-test.c | 10 ++++++++++ include/libvirt/libvirt.h.in | 16 ++++++++++++++++ src/conf/domain_conf.c | 12 ++++++++---- 3 files changed, 34 insertions(+), 4 deletions(-) diff --git a/examples/domain-events/events-c/event-test.c b/examples/domain-events/events-c/event-test.c index eeff50f..1b425fb 100644 --- a/examples/domain-events/events-c/event-test.c +++ b/examples/domain-events/events-c/event-test.c @@ -93,6 +93,9 @@ const char *eventToString(int event) { case VIR_DOMAIN_EVENT_PMSUSPENDED: ret = "PMSuspended"; break; + case VIR_DOMAIN_EVENT_CRASHED: + ret = "Crashed"; + break; } return ret; } @@ -209,6 +212,13 @@ static const char *eventDetailToString(int event, int detail) { break; } break; + case VIR_DOMAIN_EVENT_CRASHED: + switch ((virDomainEventCrashedDetailType) detail) { + case VIR_DOMAIN_EVENT_CRASHED_PANICKED: + ret = "Panicked"; + break; + } + break; } return ret; } diff --git a/include/libvirt/libvirt.h.in b/include/libvirt/libvirt.h.in index 1804c93..56c6c5c 100644 --- a/include/libvirt/libvirt.h.in +++ b/include/libvirt/libvirt.h.in @@ -155,6 +155,7 @@ typedef enum { VIR_DOMAIN_RUNNING_SAVE_CANCELED = 7, /* returned from failed save process */ VIR_DOMAIN_RUNNING_WAKEUP = 8, /* returned from pmsuspended due to wakeup event */ + VIR_DOMAIN_RUNNING_CRASHED = 9, /* resumed from crashed */ #ifdef VIR_ENUM_SENTINELS VIR_DOMAIN_RUNNING_LAST @@ -180,6 +181,7 @@ typedef enum { VIR_DOMAIN_PAUSED_FROM_SNAPSHOT = 7, /* paused after restoring from snapshot */ VIR_DOMAIN_PAUSED_SHUTTING_DOWN = 8, /* paused during shutdown process */ VIR_DOMAIN_PAUSED_SNAPSHOT = 9, /* paused while creating a snapshot */ + VIR_DOMAIN_PAUSED_GUEST_PANICKED = 10, /* paused due to a guest panicked event */ #ifdef VIR_ENUM_SENTINELS VIR_DOMAIN_PAUSED_LAST @@ -189,6 +191,7 @@ typedef enum { typedef enum { VIR_DOMAIN_SHUTDOWN_UNKNOWN = 0, /* the reason is unknown */ VIR_DOMAIN_SHUTDOWN_USER = 1, /* shutting down on user request */ + VIR_DOMAIN_SHUTDOWN_CRASHED = 2, /* domain crashed */ #ifdef VIR_ENUM_SENTINELS VIR_DOMAIN_SHUTDOWN_LAST @@ -212,6 +215,7 @@ typedef enum { typedef enum { VIR_DOMAIN_CRASHED_UNKNOWN = 0, /* crashed for unknown reason */ + VIR_DOMAIN_CRASHED_PANICKED = 1, /* domain panicked */ #ifdef VIR_ENUM_SENTINELS VIR_DOMAIN_CRASHED_LAST @@ -3319,6 +3323,7 @@ typedef enum { VIR_DOMAIN_EVENT_STOPPED = 5, VIR_DOMAIN_EVENT_SHUTDOWN = 6, VIR_DOMAIN_EVENT_PMSUSPENDED = 7, + VIR_DOMAIN_EVENT_CRASHED = 8, #ifdef VIR_ENUM_SENTINELS VIR_DOMAIN_EVENT_LAST @@ -3450,6 +3455,17 @@ typedef enum { #endif } virDomainEventPMSuspendedDetailType; +/* + * Details about the 'crashed' lifecycle event + */ +typedef enum { + VIR_DOMAIN_EVENT_CRASHED_PANICKED = 0, /* Guest was panicked */ + +#ifdef VIR_ENUM_SENTINELS + VIR_DOMAIN_EVENT_CRASHED_LAST +#endif +} virDomainEventCrashedDetailType; + /** * virConnectDomainEventCallback: * @conn: virConnect connection diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index a9656af..f577fd4 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -642,7 +642,8 @@ VIR_ENUM_IMPL(virDomainRunningReason, VIR_DOMAIN_RUNNING_LAST, "unpaused", "migration canceled", "save canceled", - "wakeup") + "wakeup", + "from crashed") VIR_ENUM_IMPL(virDomainBlockedReason, VIR_DOMAIN_BLOCKED_LAST, "unknown") @@ -657,11 +658,13 @@ VIR_ENUM_IMPL(virDomainPausedReason, VIR_DOMAIN_PAUSED_LAST, "watchdog", "from snapshot", "shutdown", - "snapshot") + "snapshot", + "guest panicked") VIR_ENUM_IMPL(virDomainShutdownReason, VIR_DOMAIN_SHUTDOWN_LAST, "unknown", - "user") + "user", + "crashed") VIR_ENUM_IMPL(virDomainShutoffReason, VIR_DOMAIN_SHUTOFF_LAST, "unknown", @@ -674,7 +677,8 @@ VIR_ENUM_IMPL(virDomainShutoffReason, VIR_DOMAIN_SHUTOFF_LAST, "from snapshot") VIR_ENUM_IMPL(virDomainCrashedReason, VIR_DOMAIN_CRASHED_LAST, - "unknown") + "unknown", + "panicked") VIR_ENUM_IMPL(virDomainPMSuspendedReason, VIR_DOMAIN_PMSUSPENDED_LAST, "unknown") -- 1.8.1.4

On 06/02/2013 09:58 PM, Chen Fan wrote:
This patch introduces domain crashed types and crashed reasons which will be used while guest panicked. --- examples/domain-events/events-c/event-test.c | 10 ++++++++++ include/libvirt/libvirt.h.in | 16 ++++++++++++++++ src/conf/domain_conf.c | 12 ++++++++---- 3 files changed, 34 insertions(+), 4 deletions(-)
+++ b/include/libvirt/libvirt.h.in @@ -155,6 +155,7 @@ typedef enum { VIR_DOMAIN_RUNNING_SAVE_CANCELED = 7, /* returned from failed save process */ VIR_DOMAIN_RUNNING_WAKEUP = 8, /* returned from pmsuspended due to wakeup event */ + VIR_DOMAIN_RUNNING_CRASHED = 9, /* resumed from crashed */
Indentation looks odd, but that's a trivial fix.
@@ -212,6 +215,7 @@ typedef enum {
typedef enum { VIR_DOMAIN_CRASHED_UNKNOWN = 0, /* crashed for unknown reason */ + VIR_DOMAIN_CRASHED_PANICKED = 1, /* domain panicked */
and again
+++ b/src/conf/domain_conf.c @@ -642,7 +642,8 @@ VIR_ENUM_IMPL(virDomainRunningReason, VIR_DOMAIN_RUNNING_LAST, "unpaused", "migration canceled", "save canceled", - "wakeup") + "wakeup", + "from crashed")
Might be better as merely "crashed"
VIR_ENUM_IMPL(virDomainBlockedReason, VIR_DOMAIN_BLOCKED_LAST, "unknown") @@ -657,11 +658,13 @@ VIR_ENUM_IMPL(virDomainPausedReason, VIR_DOMAIN_PAUSED_LAST, "watchdog", "from snapshot", "shutdown", - "snapshot") + "snapshot", + "guest panicked")
Might be better as merely "panicked" Overall, looks like a reasonable set of additions in isolation, but I'm still trying to figure out if support for xen panic detection already had existing events that we should be reusing, instead of adding new events just for qemu. -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

Applied and tested successfully on s390x platform (as expected by me). Eric mentioned already the few points that should be corrected (indentation of comments). Mit freundlichen Grüßen / Kind regards Daniel Hansel IBM Deutschland Research & Development GmbH Vorsitzende des Aufsichtsrats: Martina Koederitz Geschäftsführung: Dirk Wittkopp Sitz der Gesellschaft: Böblingen Registergericht: Amtsgericht Stuttgart, HRB 243294 On 05.06.2013 05:03, Eric Blake wrote:
On 06/02/2013 09:58 PM, Chen Fan wrote:
This patch introduces domain crashed types and crashed reasons which will be used while guest panicked. --- examples/domain-events/events-c/event-test.c | 10 ++++++++++ include/libvirt/libvirt.h.in | 16 ++++++++++++++++ src/conf/domain_conf.c | 12 ++++++++---- 3 files changed, 34 insertions(+), 4 deletions(-)
+++ b/include/libvirt/libvirt.h.in @@ -155,6 +155,7 @@ typedef enum { VIR_DOMAIN_RUNNING_SAVE_CANCELED = 7, /* returned from failed save process */ VIR_DOMAIN_RUNNING_WAKEUP = 8, /* returned from pmsuspended due to wakeup event */ + VIR_DOMAIN_RUNNING_CRASHED = 9, /* resumed from crashed */
Indentation looks odd, but that's a trivial fix.
@@ -212,6 +215,7 @@ typedef enum {
typedef enum { VIR_DOMAIN_CRASHED_UNKNOWN = 0, /* crashed for unknown reason */ + VIR_DOMAIN_CRASHED_PANICKED = 1, /* domain panicked */
and again
+++ b/src/conf/domain_conf.c @@ -642,7 +642,8 @@ VIR_ENUM_IMPL(virDomainRunningReason, VIR_DOMAIN_RUNNING_LAST, "unpaused", "migration canceled", "save canceled", - "wakeup") + "wakeup", + "from crashed")
Might be better as merely "crashed"
VIR_ENUM_IMPL(virDomainBlockedReason, VIR_DOMAIN_BLOCKED_LAST, "unknown") @@ -657,11 +658,13 @@ VIR_ENUM_IMPL(virDomainPausedReason, VIR_DOMAIN_PAUSED_LAST, "watchdog", "from snapshot", "shutdown", - "snapshot") + "snapshot", + "guest panicked")
Might be better as merely "panicked"
Overall, looks like a reasonable set of additions in isolation, but I'm still trying to figure out if support for xen panic detection already had existing events that we should be reusing, instead of adding new events just for qemu.
-- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list

On Wed, 2013-06-05 at 10:49 +0200, Daniel Hansel wrote:
Applied and tested successfully on s390x platform (as expected by me).
Eric mentioned already the few points that should be corrected (indentation of comments).
It's right.
Mit freundlichen Grüßen / Kind regards Daniel Hansel
IBM Deutschland Research & Development GmbH Vorsitzende des Aufsichtsrats: Martina Koederitz Geschäftsführung: Dirk Wittkopp Sitz der Gesellschaft: Böblingen Registergericht: Amtsgericht Stuttgart, HRB 243294
On 05.06.2013 05:03, Eric Blake wrote:
On 06/02/2013 09:58 PM, Chen Fan wrote:
This patch introduces domain crashed types and crashed reasons which will be used while guest panicked. --- examples/domain-events/events-c/event-test.c | 10 ++++++++++ include/libvirt/libvirt.h.in | 16 ++++++++++++++++ src/conf/domain_conf.c | 12 ++++++++---- 3 files changed, 34 insertions(+), 4 deletions(-)
+++ b/include/libvirt/libvirt.h.in @@ -155,6 +155,7 @@ typedef enum { VIR_DOMAIN_RUNNING_SAVE_CANCELED = 7, /* returned from failed save process */ VIR_DOMAIN_RUNNING_WAKEUP = 8, /* returned from pmsuspended due to wakeup event */ + VIR_DOMAIN_RUNNING_CRASHED = 9, /* resumed from crashed */
Indentation looks odd, but that's a trivial fix.
@@ -212,6 +215,7 @@ typedef enum {
typedef enum { VIR_DOMAIN_CRASHED_UNKNOWN = 0, /* crashed for unknown reason */ + VIR_DOMAIN_CRASHED_PANICKED = 1, /* domain panicked */
and again
+++ b/src/conf/domain_conf.c @@ -642,7 +642,8 @@ VIR_ENUM_IMPL(virDomainRunningReason, VIR_DOMAIN_RUNNING_LAST, "unpaused", "migration canceled", "save canceled", - "wakeup") + "wakeup", + "from crashed")
Might be better as merely "crashed"
VIR_ENUM_IMPL(virDomainBlockedReason, VIR_DOMAIN_BLOCKED_LAST, "unknown") @@ -657,11 +658,13 @@ VIR_ENUM_IMPL(virDomainPausedReason, VIR_DOMAIN_PAUSED_LAST, "watchdog", "from snapshot", "shutdown", - "snapshot") + "snapshot", + "guest panicked")
Might be better as merely "panicked"
Overall, looks like a reasonable set of additions in isolation, but I'm still trying to figure out if support for xen panic detection already had existing events that we should be reusing, instead of adding new events just for qemu.
-- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list

Add monitor callback API domainGUESTPanicked, that implements the 'on_crash' behavior in the XML when domain crashed. --- src/qemu/qemu_monitor.c | 14 +++++- src/qemu/qemu_monitor.h | 5 +++ src/qemu/qemu_monitor_json.c | 7 +++ src/qemu/qemu_process.c | 103 ++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 127 insertions(+), 2 deletions(-) diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c index 4e35f79..e0cd62c 100644 --- a/src/qemu/qemu_monitor.c +++ b/src/qemu/qemu_monitor.c @@ -113,7 +113,7 @@ VIR_ENUM_IMPL(qemuMonitorVMStatus, QEMU_MONITOR_VM_STATUS_LAST, "debug", "inmigrate", "internal-error", "io-error", "paused", "postmigrate", "prelaunch", "finish-migrate", "restore-vm", - "running", "save-vm", "shutdown", "watchdog") + "running", "save-vm", "shutdown", "watchdog", "guest-panicked") typedef enum { QEMU_MONITOR_BLOCK_IO_STATUS_OK, @@ -1032,6 +1032,15 @@ int qemuMonitorEmitResume(qemuMonitorPtr mon) } +int qemuMonitorEmitGUESTPanicked(qemuMonitorPtr mon) +{ + int ret = -1; + VIR_DEBUG("mon=%p", mon); + QEMU_MONITOR_CALLBACK(mon, ret, domainGUESTPanicked, mon->vm); + return ret; +} + + int qemuMonitorEmitRTCChange(qemuMonitorPtr mon, long long offset) { int ret = -1; @@ -3185,6 +3194,9 @@ int qemuMonitorVMStatusToPausedReason(const char *status) case QEMU_MONITOR_VM_STATUS_WATCHDOG: return VIR_DOMAIN_PAUSED_WATCHDOG; + case QEMU_MONITOR_VM_STATUS_GUEST_PANICKED: + return VIR_DOMAIN_PAUSED_GUEST_PANICKED; + /* unreachable from this point on */ case QEMU_MONITOR_VM_STATUS_LAST: ; diff --git a/src/qemu/qemu_monitor.h b/src/qemu/qemu_monitor.h index a607712..543050c 100644 --- a/src/qemu/qemu_monitor.h +++ b/src/qemu/qemu_monitor.h @@ -140,6 +140,8 @@ struct _qemuMonitorCallbacks { unsigned long long actual); int (*domainPMSuspendDisk)(qemuMonitorPtr mon, virDomainObjPtr vm); + int (*domainGUESTPanicked)(qemuMonitorPtr mon, + virDomainObjPtr vm); }; char *qemuMonitorEscapeArg(const char *in); @@ -220,6 +222,8 @@ int qemuMonitorEmitBlockJob(qemuMonitorPtr mon, int qemuMonitorEmitBalloonChange(qemuMonitorPtr mon, unsigned long long actual); int qemuMonitorEmitPMSuspendDisk(qemuMonitorPtr mon); +int qemuMonitorEmitGUESTPanicked(qemuMonitorPtr mon); + int qemuMonitorStartCPUs(qemuMonitorPtr mon, virConnectPtr conn); @@ -239,6 +243,7 @@ typedef enum { QEMU_MONITOR_VM_STATUS_SAVE_VM, QEMU_MONITOR_VM_STATUS_SHUTDOWN, QEMU_MONITOR_VM_STATUS_WATCHDOG, + QEMU_MONITOR_VM_STATUS_GUEST_PANICKED, QEMU_MONITOR_VM_STATUS_LAST } qemuMonitorVMStatus; diff --git a/src/qemu/qemu_monitor_json.c b/src/qemu/qemu_monitor_json.c index 2b73884..b6efa52 100644 --- a/src/qemu/qemu_monitor_json.c +++ b/src/qemu/qemu_monitor_json.c @@ -74,6 +74,7 @@ static void qemuMonitorJSONHandleBlockJobCanceled(qemuMonitorPtr mon, virJSONVal static void qemuMonitorJSONHandleBlockJobReady(qemuMonitorPtr mon, virJSONValuePtr data); static void qemuMonitorJSONHandleBalloonChange(qemuMonitorPtr mon, virJSONValuePtr data); static void qemuMonitorJSONHandlePMSuspendDisk(qemuMonitorPtr mon, virJSONValuePtr data); +static void qemuMonitorJSONHandleGUESTPanicked(qemuMonitorPtr mon, virJSONValuePtr data); typedef struct { const char *type; @@ -87,6 +88,7 @@ static qemuEventHandler eventHandlers[] = { { "BLOCK_JOB_COMPLETED", qemuMonitorJSONHandleBlockJobCompleted, }, { "BLOCK_JOB_READY", qemuMonitorJSONHandleBlockJobReady, }, { "DEVICE_TRAY_MOVED", qemuMonitorJSONHandleTrayChange, }, + { "GUEST_PANICKED", qemuMonitorJSONHandleGUESTPanicked, }, { "POWERDOWN", qemuMonitorJSONHandlePowerdown, }, { "RESET", qemuMonitorJSONHandleReset, }, { "RESUME", qemuMonitorJSONHandleResume, }, @@ -593,6 +595,11 @@ static void qemuMonitorJSONHandleResume(qemuMonitorPtr mon, virJSONValuePtr data qemuMonitorEmitResume(mon); } +static void qemuMonitorJSONHandleGUESTPanicked(qemuMonitorPtr mon, virJSONValuePtr data ATTRIBUTE_UNUSED) +{ + qemuMonitorEmitGUESTPanicked(mon); +} + static void qemuMonitorJSONHandleRTCChange(qemuMonitorPtr mon, virJSONValuePtr data) { long long offset = 0; diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index d4fd4fb..29c8c4b 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -548,6 +548,7 @@ qemuProcessFakeReboot(void *opaque) qemuDomainObjPrivatePtr priv = vm->privateData; virDomainEventPtr event = NULL; virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver); + virDomainRunningReason reason = VIR_DOMAIN_RUNNING_BOOTED; int ret = -1; VIR_DEBUG("vm=%p", vm); virObjectLock(vm); @@ -573,8 +574,11 @@ qemuProcessFakeReboot(void *opaque) goto endjob; } + if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_CRASHED) + reason = VIR_DOMAIN_RUNNING_CRASHED; + if (qemuProcessStartCPUs(driver, vm, NULL, - VIR_DOMAIN_RUNNING_BOOTED, + reason, QEMU_ASYNC_JOB_NONE) < 0) { if (virGetLastError() == NULL) virReportError(VIR_ERR_INTERNAL_ERROR, @@ -1269,6 +1273,98 @@ qemuProcessHandlePMSuspendDisk(qemuMonitorPtr mon ATTRIBUTE_UNUSED, return 0; } +static int +qemuProcessHandleGUESTPanicked(qemuMonitorPtr mon ATTRIBUTE_UNUSED, + virDomainObjPtr vm) +{ + virQEMUDriverPtr driver = qemu_driver; + qemuDomainObjPrivatePtr priv; + virDomainEventPtr event = NULL; + bool isReboot = true; + virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver); + + VIR_DEBUG("vm=%p", vm); + + virObjectLock(vm); + + if (!virDomainObjIsActive(vm)) { + VIR_DEBUG("Ignoring GUEST_PANICKED event from inactive domain %s", + vm->def->name); + goto cleanup; + } + + priv = vm->privateData; + + virDomainObjSetState(vm, + VIR_DOMAIN_CRASHED, + VIR_DOMAIN_CRASHED_PANICKED); + + event = virDomainEventNewFromObj(vm, + VIR_DOMAIN_EVENT_CRASHED, + VIR_DOMAIN_EVENT_CRASHED_PANICKED); + + if (vm->def->onCrash == VIR_DOMAIN_LIFECYCLE_CRASH_PRESERVE) { + VIR_FREE(priv->lockState); + + if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0) + VIR_WARN("Unable to release lease on %s", vm->def->name); + VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState)); + + if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm) < 0) { + VIR_WARN("Unable to save status on vm %s after state change", + vm->def->name); + } + + goto cleanup; + } + + if (vm->def->onCrash == VIR_DOMAIN_LIFECYCLE_CRASH_DESTROY) { + isReboot = false; + VIR_INFO("Domain on_crash setting overridden, shutting down"); + } + + qemuDomainSetFakeReboot(driver, vm, isReboot); + + if (isReboot) { + qemuProcessShutdownOrReboot(driver, vm); + } else { + priv->beingDestroyed = true; + + if (qemuProcessKill(vm, VIR_QEMU_PROCESS_KILL_FORCE) < 0) { + priv->beingDestroyed = false; + goto cleanup; + } + + priv->beingDestroyed = false; + + if (! virDomainObjIsActive(vm)) { + virReportError(VIR_ERR_OPERATION_INVALID, + "%s", _("domain is not running")); + goto cleanup; + } + + qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_CRASHED, 0); + event = virDomainEventNewFromObj(vm, + VIR_DOMAIN_EVENT_STOPPED, + VIR_DOMAIN_EVENT_STOPPED_CRASHED); + + virDomainAuditStop(vm, "destroyed"); + + if (! vm->persistent) { + qemuDomainRemoveInactive(driver, vm); + vm = NULL; + } + } + +cleanup: + if (vm) + virObjectUnlock(vm); + if (event) + qemuDomainEventQueue(driver, event); + virObjectUnref(cfg); + + return 0; +} static qemuMonitorCallbacks monitorCallbacks = { .destroy = qemuProcessHandleMonitorDestroy, @@ -1289,6 +1385,7 @@ static qemuMonitorCallbacks monitorCallbacks = { .domainPMSuspend = qemuProcessHandlePMSuspend, .domainBalloonChange = qemuProcessHandleBalloonChange, .domainPMSuspendDisk = qemuProcessHandlePMSuspendDisk, + .domainGUESTPanicked = qemuProcessHandleGUESTPanicked, }; static int @@ -2673,6 +2770,10 @@ qemuProcessUpdateState(virQEMUDriverPtr driver, virDomainObjPtr vm) newState = VIR_DOMAIN_SHUTDOWN; newReason = VIR_DOMAIN_SHUTDOWN_UNKNOWN; ignore_value(VIR_STRDUP_QUIET(msg, "shutdown")); + } else if (reason == VIR_DOMAIN_PAUSED_GUEST_PANICKED) { + newState = VIR_DOMAIN_CRASHED; + newReason = VIR_DOMAIN_CRASHED_PANICKED; + ignore_value(VIR_STRDUP_QUIET(msg, "was crashed")); } else { newState = VIR_DOMAIN_PAUSED; newReason = reason; -- 1.8.1.4

On 06/05/2013 03:54 AM, Chen Fan wrote:
Add monitor callback API domainGUESTPanicked, that implements the 'on_crash' behavior in the XML when domain crashed. --- src/qemu/qemu_monitor.c | 14 +++++- src/qemu/qemu_monitor.h | 5 +++ src/qemu/qemu_monitor_json.c | 7 +++ src/qemu/qemu_process.c | 103 ++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 127 insertions(+), 2 deletions(-)
diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c index 4e35f79..e0cd62c 100644 --- a/src/qemu/qemu_monitor.c +++ b/src/qemu/qemu_monitor.c @@ -113,7 +113,7 @@ VIR_ENUM_IMPL(qemuMonitorVMStatus, QEMU_MONITOR_VM_STATUS_LAST, "debug", "inmigrate", "internal-error", "io-error", "paused", "postmigrate", "prelaunch", "finish-migrate", "restore-vm", - "running", "save-vm", "shutdown", "watchdog") + "running", "save-vm", "shutdown", "watchdog", "guest-panicked")
If this is internal-only, can we use the shorter name 'guest-panic'?
typedef enum { QEMU_MONITOR_BLOCK_IO_STATUS_OK, @@ -1032,6 +1032,15 @@ int qemuMonitorEmitResume(qemuMonitorPtr mon) }
+int qemuMonitorEmitGUESTPanicked(qemuMonitorPtr mon)
WHY THE ALL CAPS? qemuMonitorEmitGuestPanic() is sufficient for naming purposes.
+{ + int ret = -1; + VIR_DEBUG("mon=%p", mon); + QEMU_MONITOR_CALLBACK(mon, ret, domainGUESTPanicked, mon->vm); + return ret; +} + + int qemuMonitorEmitRTCChange(qemuMonitorPtr mon, long long offset)
If it was because of copy-and-paste, remember that RTC is an acronym, but Guest is not.
+++ b/src/qemu/qemu_monitor.h @@ -140,6 +140,8 @@ struct _qemuMonitorCallbacks { unsigned long long actual); int (*domainPMSuspendDisk)(qemuMonitorPtr mon, virDomainObjPtr vm); + int (*domainGUESTPanicked)(qemuMonitorPtr mon,
Again, s/GUEST/Guest/
+ virDomainObjPtr vm); };
char *qemuMonitorEscapeArg(const char *in); @@ -220,6 +222,8 @@ int qemuMonitorEmitBlockJob(qemuMonitorPtr mon, int qemuMonitorEmitBalloonChange(qemuMonitorPtr mon, unsigned long long actual); int qemuMonitorEmitPMSuspendDisk(qemuMonitorPtr mon); +int qemuMonitorEmitGUESTPanicked(qemuMonitorPtr mon);
and again.
+++ b/src/qemu/qemu_monitor_json.c @@ -74,6 +74,7 @@ static void qemuMonitorJSONHandleBlockJobCanceled(qemuMonitorPtr mon, virJSONVal static void qemuMonitorJSONHandleBlockJobReady(qemuMonitorPtr mon, virJSONValuePtr data); static void qemuMonitorJSONHandleBalloonChange(qemuMonitorPtr mon, virJSONValuePtr data); static void qemuMonitorJSONHandlePMSuspendDisk(qemuMonitorPtr mon, virJSONValuePtr data); +static void qemuMonitorJSONHandleGUESTPanicked(qemuMonitorPtr mon, virJSONValuePtr data);
and again
+static int +qemuProcessHandleGUESTPanicked(qemuMonitorPtr mon ATTRIBUTE_UNUSED, + virDomainObjPtr vm) +{
+ + if (vm->def->onCrash == VIR_DOMAIN_LIFECYCLE_CRASH_PRESERVE) { ... + goto cleanup; + } + + if (vm->def->onCrash == VIR_DOMAIN_LIFECYCLE_CRASH_DESTROY) {
Worth doing this as 'else if'?
+ isReboot = false; + VIR_INFO("Domain on_crash setting overridden, shutting down");
That sounds like fake reboot is overriding on_crash settings. I'd word this: on_crash overrides fake reboot request, shutting down instead and maybe be careful to only output that info message only if isReboot actually changed from true to false.
+ + if (! virDomainObjIsActive(vm)) {
Generally no space after '!'.
@@ -2673,6 +2770,10 @@ qemuProcessUpdateState(virQEMUDriverPtr driver, virDomainObjPtr vm) newState = VIR_DOMAIN_SHUTDOWN; newReason = VIR_DOMAIN_SHUTDOWN_UNKNOWN; ignore_value(VIR_STRDUP_QUIET(msg, "shutdown")); + } else if (reason == VIR_DOMAIN_PAUSED_GUEST_PANICKED) { + newState = VIR_DOMAIN_CRASHED; + newReason = VIR_DOMAIN_CRASHED_PANICKED; + ignore_value(VIR_STRDUP_QUIET(msg, "was crashed"));
s/was // Overall, the mechanics seem like they will work, but the spelling to use Guest instead of GUEST in function names probably warrants a respin. -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

Add 'virsh domstate --reason' command is effective when domain crashed. --- tools/virsh-domain-monitor.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/virsh-domain-monitor.c b/tools/virsh-domain-monitor.c index 5ed89d1..4bddefe 100644 --- a/tools/virsh-domain-monitor.c +++ b/tools/virsh-domain-monitor.c @@ -192,6 +192,8 @@ vshDomainStateReasonToString(int state, int reason) return N_("save canceled"); case VIR_DOMAIN_RUNNING_WAKEUP: return N_("event wakeup"); + case VIR_DOMAIN_RUNNING_CRASHED: + return N_("from crashed"); case VIR_DOMAIN_RUNNING_UNKNOWN: case VIR_DOMAIN_RUNNING_LAST: ; @@ -226,6 +228,8 @@ vshDomainStateReasonToString(int state, int reason) return N_("shutting down"); case VIR_DOMAIN_PAUSED_SNAPSHOT: return N_("creating snapshot"); + case VIR_DOMAIN_PAUSED_GUEST_PANICKED: + return N_("guest panicked"); case VIR_DOMAIN_PAUSED_UNKNOWN: case VIR_DOMAIN_PAUSED_LAST: ; @@ -236,6 +240,8 @@ vshDomainStateReasonToString(int state, int reason) switch ((virDomainShutdownReason) reason) { case VIR_DOMAIN_SHUTDOWN_USER: return N_("user"); + case VIR_DOMAIN_SHUTDOWN_CRASHED: + return N_("crashed"); case VIR_DOMAIN_SHUTDOWN_UNKNOWN: case VIR_DOMAIN_SHUTDOWN_LAST: ; @@ -266,6 +272,8 @@ vshDomainStateReasonToString(int state, int reason) case VIR_DOMAIN_CRASHED: switch ((virDomainCrashedReason) reason) { + case VIR_DOMAIN_CRASHED_PANICKED: + return N_("panicked"); case VIR_DOMAIN_CRASHED_UNKNOWN: case VIR_DOMAIN_CRASHED_LAST: ; -- 1.8.1.4

On 06/05/2013 03:54 AM, Chen Fan wrote:
Add 'virsh domstate --reason' command is effective when domain crashed. --- tools/virsh-domain-monitor.c | 8 ++++++++ 1 file changed, 8 insertions(+)
This should probably be squashed in with your 1/5 patch that introduces the new enum values in the first place. In fact, patch 1/5 fails to compile with decent gcc: virsh-domain-monitor.c: In function 'vshDomainStateReasonToString': virsh-domain-monitor.c:178:9: error: enumeration value 'VIR_DOMAIN_RUNNING_CRASHED' not handled in switch [-Werror=switch] virsh-domain-monitor.c:210:9: error: enumeration value 'VIR_DOMAIN_PAUSED_GUEST_PANICKED' not handled in switch [-Werror=switch] virsh-domain-monitor.c:236:9: error: enumeration value 'VIR_DOMAIN_SHUTDOWN_CRASHED' not handled in switch [-Werror=switch] virsh-domain-monitor.c:268:9: error: enumeration value 'VIR_DOMAIN_CRASHED_PANICKED' not handled in switch [-Werror=switch] virsh-domain-monitor.c: At top level: cc1: error: unrecognized command line option "-Wno-unused-command-line-argument" [-Werror] cc1: all warnings being treated as errors and ALL patches in the series should be independently compilable. -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

We want to implement the oncrash actions through the thread Pool. so need to define several types for Implementing that options in watchdogAction. --- src/conf/domain_conf.c | 4 ++++ src/conf/domain_conf.h | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index f577fd4..1340350 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -477,6 +477,10 @@ VIR_ENUM_IMPL(virDomainWatchdogAction, VIR_DOMAIN_WATCHDOG_ACTION_LAST, "poweroff", "pause", "dump", + "destroy after crashdump", + "reset after crashdump", + "destroy after crash", + "reset after crash", "none") VIR_ENUM_IMPL(virDomainVideo, VIR_DOMAIN_VIDEO_TYPE_LAST, diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 3a71d6c..bd05c2e 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -1236,6 +1236,10 @@ enum virDomainWatchdogAction { VIR_DOMAIN_WATCHDOG_ACTION_POWEROFF, VIR_DOMAIN_WATCHDOG_ACTION_PAUSE, VIR_DOMAIN_WATCHDOG_ACTION_DUMP, + VIR_DOMAIN_WATCHDOG_ACTION_CRASHDUMP_DESTROY, /* destroy after crashdump */ + VIR_DOMAIN_WATCHDOG_ACTION_CRASHDUMP_RESET, /* reset after crashdump */ + VIR_DOMAIN_WATCHDOG_ACTION_CRASH_DESTROY, /* destroy after crash dircetly */ + VIR_DOMAIN_WATCHDOG_ACTION_CRASH_RESET, /* reset after crash directly */ VIR_DOMAIN_WATCHDOG_ACTION_NONE, VIR_DOMAIN_WATCHDOG_ACTION_LAST -- 1.8.1.4

On Wed, Jun 05, 2013 at 05:54:59PM +0800, Chen Fan wrote:
We want to implement the oncrash actions through the thread Pool. so need to define several types for Implementing that options in watchdogAction. --- src/conf/domain_conf.c | 4 ++++ src/conf/domain_conf.h | 4 ++++ 2 files changed, 8 insertions(+)
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index f577fd4..1340350 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -477,6 +477,10 @@ VIR_ENUM_IMPL(virDomainWatchdogAction, VIR_DOMAIN_WATCHDOG_ACTION_LAST, "poweroff", "pause", "dump", + "destroy after crashdump", + "reset after crashdump", + "destroy after crash", + "reset after crash", "none")
VIR_ENUM_IMPL(virDomainVideo, VIR_DOMAIN_VIDEO_TYPE_LAST, diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 3a71d6c..bd05c2e 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -1236,6 +1236,10 @@ enum virDomainWatchdogAction { VIR_DOMAIN_WATCHDOG_ACTION_POWEROFF, VIR_DOMAIN_WATCHDOG_ACTION_PAUSE, VIR_DOMAIN_WATCHDOG_ACTION_DUMP, + VIR_DOMAIN_WATCHDOG_ACTION_CRASHDUMP_DESTROY, /* destroy after crashdump */ + VIR_DOMAIN_WATCHDOG_ACTION_CRASHDUMP_RESET, /* reset after crashdump */ + VIR_DOMAIN_WATCHDOG_ACTION_CRASH_DESTROY, /* destroy after crash dircetly */ + VIR_DOMAIN_WATCHDOG_ACTION_CRASH_RESET, /* reset after crash directly */ VIR_DOMAIN_WATCHDOG_ACTION_NONE,
VIR_DOMAIN_WATCHDOG_ACTION_LAST
NACK to this. The guest panick notification is a completely separate mechanism to the guest watchdog device. We should not be overloading watchdog actions for panick handling. Guest panic handling is defined for Xen via the <on_crash>...</on_crash> lifecycle action, and we should use this same syntax for KVM. Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

Through the watchdog actions, we can implement the docoredump func, we rewrite the processWatchdogEvent function to serval independent functions, so we move the previous implementation of the destroy and restart code to here. then the code looks like easy. --- src/qemu/qemu_driver.c | 197 +++++++++++++++++++++++++++++++++++++----------- src/qemu/qemu_process.c | 118 +++++++++++++---------------- src/qemu/qemu_process.h | 2 + 3 files changed, 208 insertions(+), 109 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 4a76f14..4743539 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -3441,76 +3441,183 @@ cleanup: return ret; } +static int +qemuProcessWatchdogCrashEvent(virQEMUDriverPtr driver, + virDomainObjPtr vm, + int watchDogAction) +{ + int ret = -1; + qemuDomainObjPrivatePtr priv = vm->privateData; + virDomainEventPtr event = NULL; + bool isReset = false; + + if (watchDogAction == VIR_DOMAIN_WATCHDOG_ACTION_CRASH_RESET || + watchDogAction == VIR_DOMAIN_WATCHDOG_ACTION_CRASHDUMP_RESET) + isReset = true; + + qemuDomainSetFakeReboot(driver, vm, isReset); + + if (isReset) { + qemuProcessShutdownOrReboot(driver, vm); + return 0; + } + + /* If isReset, then do follow. */ + priv->beingDestroyed = true; + + if (qemuProcessKill(vm, VIR_QEMU_PROCESS_KILL_FORCE) < 0) { + priv->beingDestroyed = false; + goto cleanup; + } + + if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_DESTROY) < 0) { + goto cleanup; + } + + priv->beingDestroyed = false; + + if (! virDomainObjIsActive(vm)) { + virReportError(VIR_ERR_OPERATION_INVALID, + "%s", _("domain is not running")); + goto endjob; + } + + qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_CRASHED, 0); + event = virDomainEventNewFromObj(vm, + VIR_DOMAIN_EVENT_STOPPED, + VIR_DOMAIN_EVENT_STOPPED_CRASHED); + + virDomainAuditStop(vm, "destroyed"); + + ret = 0; + +endjob: + /* Safe to ignore value since ref count was incremented in + * qemuProcessHandleWatchdog(). + */ + ignore_value(qemuDomainObjEndJob(driver, vm)); + +cleanup: + if (event) + qemuDomainEventQueue(driver, event); + return ret; +} + + +static int +qemuProcessWatchdogDumpEvent(virQEMUDriverPtr driver, + virDomainObjPtr vm, + int watchDogAction, + unsigned int flags) +{ + int ret = -1; + char *dumpfile = NULL; + virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver); + + if (virAsprintf(&dumpfile, "%s/%s-%u", + cfg->autoDumpPath, + vm->def->name, + (unsigned int)time(NULL)) < 0) { + virReportOOMError(); + goto cleanup; + } + + if (qemuDomainObjBeginAsyncJob(driver, vm, + QEMU_ASYNC_JOB_DUMP) < 0) { + goto cleanup; + } + + if (!virDomainObjIsActive(vm)) { + virReportError(VIR_ERR_OPERATION_INVALID, + "%s", _("domain is not running")); + goto endjob; + } + + flags |= cfg->autoDumpBypassCache ? VIR_DUMP_BYPASS_CACHE: 0; + + ret = doCoreDump(driver, vm, dumpfile, + getCompressionType(driver), flags); + if (ret < 0) + virReportError(VIR_ERR_OPERATION_FAILED, + "%s", _("Dump failed")); + + if (watchDogAction == VIR_DOMAIN_WATCHDOG_ACTION_DUMP) { + ret = qemuProcessStartCPUs(driver, vm, NULL, + VIR_DOMAIN_RUNNING_UNPAUSED, + QEMU_ASYNC_JOB_DUMP); + + if (ret < 0) + virReportError(VIR_ERR_OPERATION_FAILED, + "%s", _("Resuming after dump failed")); + } + +endjob: + /* Safe to ignore value since ref count was incremented in + * qemuProcessHandleWatchdog(). + */ + ignore_value(qemuDomainObjEndAsyncJob(driver, vm)); + +cleanup: + VIR_FREE(dumpfile); + virObjectUnref(cfg); + return ret; +} + static void processWatchdogEvent(void *data, void *opaque) { int ret; struct qemuDomainWatchdogEvent *wdEvent = data; virQEMUDriverPtr driver = opaque; - virQEMUDriverConfigPtr cfg; virObjectLock(wdEvent->vm); - cfg = virQEMUDriverGetConfig(driver); switch (wdEvent->action) { case VIR_DOMAIN_WATCHDOG_ACTION_DUMP: { - char *dumpfile; unsigned int flags = 0; + qemuProcessWatchdogDumpEvent(driver, wdEvent->vm, wdEvent->action, flags); + } + break; + case VIR_DOMAIN_WATCHDOG_ACTION_CRASHDUMP_DESTROY: + case VIR_DOMAIN_WATCHDOG_ACTION_CRASHDUMP_RESET: + { + unsigned int flags = VIR_DUMP_MEMORY_ONLY; - if (virAsprintf(&dumpfile, "%s/%s-%u", - cfg->autoDumpPath, - wdEvent->vm->def->name, - (unsigned int)time(NULL)) < 0) { - virReportOOMError(); - goto unlock; - } - - if (qemuDomainObjBeginAsyncJob(driver, wdEvent->vm, - QEMU_ASYNC_JOB_DUMP) < 0) { - VIR_FREE(dumpfile); - goto unlock; - } + ret = qemuProcessWatchdogDumpEvent(driver, wdEvent->vm, wdEvent->action, flags); - if (!virDomainObjIsActive(wdEvent->vm)) { - virReportError(VIR_ERR_OPERATION_INVALID, - "%s", _("domain is not running")); - VIR_FREE(dumpfile); - goto endjob; - } - - flags |= cfg->autoDumpBypassCache ? VIR_DUMP_BYPASS_CACHE: 0; - ret = doCoreDump(driver, wdEvent->vm, dumpfile, - getCompressionType(driver), flags); if (ret < 0) - virReportError(VIR_ERR_OPERATION_FAILED, - "%s", _("Dump failed")); + goto unlock; - ret = qemuProcessStartCPUs(driver, wdEvent->vm, NULL, - VIR_DOMAIN_RUNNING_UNPAUSED, - QEMU_ASYNC_JOB_DUMP); + qemuProcessWatchdogCrashEvent(driver, wdEvent->vm, wdEvent->action); - if (ret < 0) - virReportError(VIR_ERR_OPERATION_FAILED, - "%s", _("Resuming after dump failed")); - - VIR_FREE(dumpfile); + if (wdEvent->action == VIR_DOMAIN_WATCHDOG_ACTION_CRASHDUMP_DESTROY) { + if (!wdEvent->vm->persistent) { + qemuDomainRemoveInactive(driver, wdEvent->vm); + wdEvent->vm = NULL; + } + } + } + break; + case VIR_DOMAIN_WATCHDOG_ACTION_CRASH_RESET: + case VIR_DOMAIN_WATCHDOG_ACTION_CRASH_DESTROY: + qemuProcessWatchdogCrashEvent(driver, wdEvent->vm, wdEvent->action); + if (wdEvent->action == VIR_DOMAIN_WATCHDOG_ACTION_CRASH_DESTROY) { + if (!wdEvent->vm->persistent) { + qemuDomainRemoveInactive(driver, wdEvent->vm); + wdEvent->vm = NULL; + } } break; default: goto unlock; } -endjob: - /* Safe to ignore value since ref count was incremented in - * qemuProcessHandleWatchdog(). - */ - ignore_value(qemuDomainObjEndAsyncJob(driver, wdEvent->vm)); - unlock: - virObjectUnlock(wdEvent->vm); - virObjectUnref(wdEvent->vm); + if (wdEvent->vm) { + virObjectUnlock(wdEvent->vm); + virObjectUnref(wdEvent->vm); + } VIR_FREE(wdEvent); - virObjectUnref(cfg); } static int qemuDomainHotplugVcpus(virQEMUDriverPtr driver, diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 29c8c4b..9100a67 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -615,7 +615,7 @@ cleanup: } -static void +void qemuProcessShutdownOrReboot(virQEMUDriverPtr driver, virDomainObjPtr vm) { @@ -816,6 +816,27 @@ qemuProcessHandleRTCChange(qemuMonitorPtr mon ATTRIBUTE_UNUSED, return 0; } +static void sendWatchDogEvent(virQEMUDriverPtr driver, + virDomainObjPtr vm, + int action) +{ + struct qemuDomainWatchdogEvent *wdEvent = NULL; + if (VIR_ALLOC(wdEvent) == 0) { + wdEvent->action = action; + wdEvent->vm = vm; + /* Hold an extra reference because we can't allow 'vm' to be + * deleted before handling watchdog event is finished. + */ + virObjectRef(vm); + if (virThreadPoolSendJob(driver->workerPool, 0, wdEvent) < 0) { + if (!virObjectUnref(vm)) + vm = NULL; + VIR_FREE(wdEvent); + } + } else { + virReportOOMError(); + } +} static int qemuProcessHandleWatchdog(qemuMonitorPtr mon ATTRIBUTE_UNUSED, @@ -852,22 +873,7 @@ qemuProcessHandleWatchdog(qemuMonitorPtr mon ATTRIBUTE_UNUSED, } if (vm->def->watchdog->action == VIR_DOMAIN_WATCHDOG_ACTION_DUMP) { - struct qemuDomainWatchdogEvent *wdEvent; - if (VIR_ALLOC(wdEvent) == 0) { - wdEvent->action = VIR_DOMAIN_WATCHDOG_ACTION_DUMP; - wdEvent->vm = vm; - /* Hold an extra reference because we can't allow 'vm' to be - * deleted before handling watchdog event is finished. - */ - virObjectRef(vm); - if (virThreadPoolSendJob(driver->workerPool, 0, wdEvent) < 0) { - if (!virObjectUnref(vm)) - vm = NULL; - VIR_FREE(wdEvent); - } - } else { - virReportOOMError(); - } + sendWatchDogEvent(driver, vm, VIR_DOMAIN_WATCHDOG_ACTION_DUMP); } if (vm) @@ -1273,6 +1279,7 @@ qemuProcessHandlePMSuspendDisk(qemuMonitorPtr mon ATTRIBUTE_UNUSED, return 0; } + static int qemuProcessHandleGUESTPanicked(qemuMonitorPtr mon ATTRIBUTE_UNUSED, virDomainObjPtr vm) @@ -1280,8 +1287,8 @@ qemuProcessHandleGUESTPanicked(qemuMonitorPtr mon ATTRIBUTE_UNUSED, virQEMUDriverPtr driver = qemu_driver; qemuDomainObjPrivatePtr priv; virDomainEventPtr event = NULL; - bool isReboot = true; virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver); + int watchDogAction = VIR_DOMAIN_WATCHDOG_ACTION_NONE; VIR_DEBUG("vm=%p", vm); @@ -1295,6 +1302,9 @@ qemuProcessHandleGUESTPanicked(qemuMonitorPtr mon ATTRIBUTE_UNUSED, priv = vm->privateData; + VIR_DEBUG("Transitioned guest %s to crashed state for panic", + vm->def->name); + virDomainObjSetState(vm, VIR_DOMAIN_CRASHED, VIR_DOMAIN_CRASHED_PANICKED); @@ -1303,58 +1313,38 @@ qemuProcessHandleGUESTPanicked(qemuMonitorPtr mon ATTRIBUTE_UNUSED, VIR_DOMAIN_EVENT_CRASHED, VIR_DOMAIN_EVENT_CRASHED_PANICKED); - if (vm->def->onCrash == VIR_DOMAIN_LIFECYCLE_CRASH_PRESERVE) { - VIR_FREE(priv->lockState); - - if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0) - VIR_WARN("Unable to release lease on %s", vm->def->name); - VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState)); + VIR_FREE(priv->lockState); - if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm) < 0) { - VIR_WARN("Unable to save status on vm %s after state change", - vm->def->name); - } + if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0) + VIR_WARN("Unable to release lease on %s", vm->def->name); + VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState)); - goto cleanup; + if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm) < 0) { + VIR_WARN("Unable to save status on vm %s after state change", + vm->def->name); } - if (vm->def->onCrash == VIR_DOMAIN_LIFECYCLE_CRASH_DESTROY) { - isReboot = false; - VIR_INFO("Domain on_crash setting overridden, shutting down"); + switch (vm->def->onCrash) { + case VIR_DOMAIN_LIFECYCLE_CRASH_PRESERVE: + break; + case VIR_DOMAIN_LIFECYCLE_CRASH_DESTROY: + watchDogAction = VIR_DOMAIN_WATCHDOG_ACTION_CRASH_DESTROY; + break; + case VIR_DOMAIN_LIFECYCLE_CRASH_COREDUMP_DESTROY: + watchDogAction = VIR_DOMAIN_WATCHDOG_ACTION_CRASHDUMP_DESTROY; + break; + case VIR_DOMAIN_LIFECYCLE_CRASH_RESTART_RENAME: + /* unrealized */ + break; + case VIR_DOMAIN_LIFECYCLE_CRASH_RESTART: + watchDogAction = VIR_DOMAIN_WATCHDOG_ACTION_CRASH_RESET; + break; + case VIR_DOMAIN_LIFECYCLE_CRASH_COREDUMP_RESTART: + watchDogAction = VIR_DOMAIN_WATCHDOG_ACTION_CRASHDUMP_RESET; + break; } - qemuDomainSetFakeReboot(driver, vm, isReboot); - - if (isReboot) { - qemuProcessShutdownOrReboot(driver, vm); - } else { - priv->beingDestroyed = true; - - if (qemuProcessKill(vm, VIR_QEMU_PROCESS_KILL_FORCE) < 0) { - priv->beingDestroyed = false; - goto cleanup; - } - - priv->beingDestroyed = false; - - if (! virDomainObjIsActive(vm)) { - virReportError(VIR_ERR_OPERATION_INVALID, - "%s", _("domain is not running")); - goto cleanup; - } - - qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_CRASHED, 0); - event = virDomainEventNewFromObj(vm, - VIR_DOMAIN_EVENT_STOPPED, - VIR_DOMAIN_EVENT_STOPPED_CRASHED); - - virDomainAuditStop(vm, "destroyed"); - - if (! vm->persistent) { - qemuDomainRemoveInactive(driver, vm); - vm = NULL; - } - } + sendWatchDogEvent(driver, vm, watchDogAction); cleanup: if (vm) diff --git a/src/qemu/qemu_process.h b/src/qemu/qemu_process.h index d4768fc..9223e07 100644 --- a/src/qemu/qemu_process.h +++ b/src/qemu/qemu_process.h @@ -86,6 +86,8 @@ typedef enum { int qemuProcessKill(virDomainObjPtr vm, unsigned int flags); +void qemuProcessShutdownOrReboot(virQEMUDriverPtr driver, + virDomainObjPtr vm); int qemuProcessAutoDestroyInit(virQEMUDriverPtr driver); void qemuProcessAutoDestroyShutdown(virQEMUDriverPtr driver); int qemuProcessAutoDestroyAdd(virQEMUDriverPtr driver, -- 1.8.1.4

On 06/05/2013 03:55 AM, Chen Fan wrote:
Through the watchdog actions, we can implement the docoredump func, we rewrite the processWatchdogEvent function to serval independent functions, so we move the previous implementation of the destroy and restart code to here. then the code looks like easy. --- src/qemu/qemu_driver.c | 197 +++++++++++++++++++++++++++++++++++++----------- src/qemu/qemu_process.c | 118 +++++++++++++---------------- src/qemu/qemu_process.h | 2 + 3 files changed, 208 insertions(+), 109 deletions(-)
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 4a76f14..4743539 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -3441,76 +3441,183 @@ cleanup: return ret; }
+static int +qemuProcessWatchdogCrashEvent(virQEMUDriverPtr driver, + virDomainObjPtr vm, + int watchDogAction)
Indentation is off. Just as Dan complained on 4/5, guest panic and watchdog events are NOT the same; if you are going to have both call into common code, then the common code needs to be named something that fits the scenario correctly.
+ + if (isReset) { + qemuProcessShutdownOrReboot(driver, vm); + return 0; + } + + /* If isReset, then do follow. */
This comment doesn't make sense with the earlier code; if isReset, then we've already exited. I'd just delete the comment, as it doesn't add anything.
+static int +qemuProcessWatchdogDumpEvent(virQEMUDriverPtr driver, + virDomainObjPtr vm, + int watchDogAction, + unsigned int flags) +{ + int ret = -1; + char *dumpfile = NULL; + virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver); + + if (virAsprintf(&dumpfile, "%s/%s-%u", + cfg->autoDumpPath, + vm->def->name, + (unsigned int)time(NULL)) < 0) {
This risks truncation of a 64-bit result from time().
+ case VIR_DOMAIN_WATCHDOG_ACTION_CRASHDUMP_DESTROY: + case VIR_DOMAIN_WATCHDOG_ACTION_CRASHDUMP_RESET: + { + unsigned int flags = VIR_DUMP_MEMORY_ONLY;
- if (virAsprintf(&dumpfile, "%s/%s-%u", - cfg->autoDumpPath, - wdEvent->vm->def->name, - (unsigned int)time(NULL)) < 0) {
then again, you are just doing code motion from earlier bad code. If you are going to do code motion, it's best to separate the refactoring into one commit, and then the new use of the refactored code in the next commit, rather than trying to combine the two steps into one commit. That is, it is easier for a reviewer to see that all you did in the first commit was pull things into their own function, rather than trying to figure out which part of the commit is new or movement.
+++ b/src/qemu/qemu_process.c @@ -615,7 +615,7 @@ cleanup: }
-static void +void qemuProcessShutdownOrReboot(virQEMUDriverPtr driver, virDomainObjPtr vm) { @@ -816,6 +816,27 @@ qemuProcessHandleRTCChange(qemuMonitorPtr mon ATTRIBUTE_UNUSED, return 0; }
+static void sendWatchDogEvent(virQEMUDriverPtr driver, + virDomainObjPtr vm, + int action)
Indentation is off. Although we aren't very consistent in existing functions, our style for new functions is: static void sendWatchDogEvent(virQEMUDriverPtr driver, virDomainObjPtr vm, int action) My review was rather cursory, so there may be another round of meat to fix. In general, I'm grateful that you are working on adding this feature, and hope that it is in better shape by the time we are ready for freeze for 1.0.7 :) -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

Thx for your detailed reviewer, I will make correction as you pointed out. On Wed, 2013-06-05 at 16:54 -0600, Eric Blake wrote:
On 06/05/2013 03:55 AM, Chen Fan wrote:
Through the watchdog actions, we can implement the docoredump func, we rewrite the processWatchdogEvent function to serval independent functions, so we move the previous implementation of the destroy and restart code to here. then the code looks like easy. --- src/qemu/qemu_driver.c | 197 +++++++++++++++++++++++++++++++++++++----------- src/qemu/qemu_process.c | 118 +++++++++++++---------------- src/qemu/qemu_process.h | 2 + 3 files changed, 208 insertions(+), 109 deletions(-)
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 4a76f14..4743539 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -3441,76 +3441,183 @@ cleanup: return ret; }
+static int +qemuProcessWatchdogCrashEvent(virQEMUDriverPtr driver, + virDomainObjPtr vm, + int watchDogAction)
Indentation is off.
Just as Dan complained on 4/5, guest panic and watchdog events are NOT the same; if you are going to have both call into common code, then the common code needs to be named something that fits the scenario correctly.
+ + if (isReset) { + qemuProcessShutdownOrReboot(driver, vm); + return 0; + } + + /* If isReset, then do follow. */
This comment doesn't make sense with the earlier code; if isReset, then we've already exited. I'd just delete the comment, as it doesn't add anything.
+static int +qemuProcessWatchdogDumpEvent(virQEMUDriverPtr driver, + virDomainObjPtr vm, + int watchDogAction, + unsigned int flags) +{ + int ret = -1; + char *dumpfile = NULL; + virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver); + + if (virAsprintf(&dumpfile, "%s/%s-%u", + cfg->autoDumpPath, + vm->def->name, + (unsigned int)time(NULL)) < 0) {
This risks truncation of a 64-bit result from time().
+ case VIR_DOMAIN_WATCHDOG_ACTION_CRASHDUMP_DESTROY: + case VIR_DOMAIN_WATCHDOG_ACTION_CRASHDUMP_RESET: + { + unsigned int flags = VIR_DUMP_MEMORY_ONLY;
- if (virAsprintf(&dumpfile, "%s/%s-%u", - cfg->autoDumpPath, - wdEvent->vm->def->name, - (unsigned int)time(NULL)) < 0) {
then again, you are just doing code motion from earlier bad code.
If you are going to do code motion, it's best to separate the refactoring into one commit, and then the new use of the refactored code in the next commit, rather than trying to combine the two steps into one commit. That is, it is easier for a reviewer to see that all you did in the first commit was pull things into their own function, rather than trying to figure out which part of the commit is new or movement.
+++ b/src/qemu/qemu_process.c @@ -615,7 +615,7 @@ cleanup: }
-static void +void qemuProcessShutdownOrReboot(virQEMUDriverPtr driver, virDomainObjPtr vm) { @@ -816,6 +816,27 @@ qemuProcessHandleRTCChange(qemuMonitorPtr mon ATTRIBUTE_UNUSED, return 0; }
+static void sendWatchDogEvent(virQEMUDriverPtr driver, + virDomainObjPtr vm, + int action)
Indentation is off. Although we aren't very consistent in existing functions, our style for new functions is:
static void sendWatchDogEvent(virQEMUDriverPtr driver, virDomainObjPtr vm, int action)
My review was rather cursory, so there may be another round of meat to fix. In general, I'm grateful that you are working on adding this feature, and hope that it is in better shape by the time we are ready for freeze for 1.0.7 :)
participants (5)
-
Chen Fan
-
chenfan
-
Daniel Hansel
-
Daniel P. Berrange
-
Eric Blake