[libvirt] [PATCH] virlog: Add stack trace log when libvirt receives fatal signal

An earlier commit(c0c8c1) Dan removed global log buffer feature entirely because of duplicate log messages. An improvement is introduced. That is dumping stack trace instead of log buffer upon libvirt crash. Signed-off-by: Zhou Yimin <zhouyimin@huawei.com> Signed-off-by: Wang Rui <moon.wangrui@huawei.com> --- When libvirt receives fatal signal, the log output shows as following. Caught abort signal dumping stack trace: ====== start of stack trace ===== /usr/lib64/libvirt.so.0(+0x1d4fde)[0x7f001b1b0fde] /lib64/libpthread.so.0(+0xf800)[0x7f00182ed800] /lib64/libc.so.6(gsignal+0x35)[0x7f0017b91b55] /lib64/libc.so.6(abort+0x181)[0x7f0017b93131] /lib64/libc.so.6(+0x70e0f)[0x7f0017bcfe0f] /lib64/libc.so.6(+0x76618)[0x7f0017bd5618] /lib64/libc.so.6(+0x76c3f)[0x7f0017bd5c3f] /lib64/libc.so.6(+0x78fee)[0x7f0017bd7fee] /lib64/libc.so.6(__libc_malloc+0x77)[0x7f0017bda747] /usr/lib64/libvirt.so.0(virReallocN+0x65)[0x7f001b03c06d] /usr/lib64/libvirt.so.0(+0x7ebbc)[0x7f001b05abbc] /usr/lib64/libvirt.so.0(virFileReadLimFD+0x50)[0x7f001b05ad4a] /usr/lib64/libvirt.so.0(virFileReadAll+0xa6)[0x7f001b05c7d1] /usr/lib64/libvirt.so.0(virProcessGetStartTime+0x9a)[0x7f001b080ee9] /usr/lib64/libvirt.so.0(virNetSocketGetUNIXIdentity+0xc1)[0x7f001b1bf709] /usr/lib64/libvirt.so.0(virNetServerClientGetUNIXIdentity+0x68)[0x7f001b1b35b6] /usr/sbin/libvirtd(+0x20103)[0x7f001bb8f103] /usr/sbin/libvirtd(+0x20347)[0x7f001bb8f347] /usr/lib64/libvirt.so.0(+0x1db9bd)[0x7f001b1b79bd] /usr/lib64/libvirt.so.0(virNetServerProgramDispatch+0x209)[0x7f001b1b7dff] /usr/lib64/libvirt.so.0(+0x1d5448)[0x7f001b1b1448] /usr/lib64/libvirt.so.0(+0x1d572a)[0x7f001b1b172a] /usr/lib64/libvirt.so.0(+0xb3964)[0x7f001b08f964] /usr/lib64/libvirt.so.0(+0xb2dd0)[0x7f001b08edd0] /lib64/libpthread.so.0(+0x77f6)[0x7f00182e57f6] /lib64/libc.so.6(clone+0x6d)[0x7f0017c3a09d] ====== end of stack trace ===== src/libvirt_private.syms | 1 + src/rpc/virnetserver.c | 43 +++++++++++++++++++++++ src/util/virlog.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++++ src/util/virlog.h | 2 ++ 4 files changed, 134 insertions(+) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index c3332c9..d781595 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1501,6 +1501,7 @@ virLogGetFilters; virLogGetNbFilters; virLogGetNbOutputs; virLogGetOutputs; +virLogEmergencyDumpAll; virLogLock; virLogMessage; virLogParseDefaultPriority; diff --git a/src/rpc/virnetserver.c b/src/rpc/virnetserver.c index 7e3fc0a..2845098 100644 --- a/src/rpc/virnetserver.c +++ b/src/rpc/virnetserver.c @@ -340,6 +340,32 @@ static int virNetServerDispatchNewClient(virNetServerServicePtr svc, return 0; } +static void +virNetServerFatalSignal(int sig, siginfo_t *siginfo ATTRIBUTE_UNUSED, + void *context ATTRIBUTE_UNUSED) +{ + struct sigaction sig_action; + int origerrno; + + origerrno = errno; + virLogEmergencyDumpAll(sig); + + /* + * If the signal is fatal, avoid looping over this handler + * by deactivating it + */ +#ifdef SIGUSR2 + if (sig != SIGUSR2) { +#endif + memset(&sig_action, 0, sizeof(sig_action)); + sig_action.sa_handler = SIG_DFL; + sigaction(sig, &sig_action, NULL); + raise(sig); +#ifdef SIGUSR2 + } +#endif + errno = origerrno; +} virNetServerPtr virNetServerNew(size_t min_workers, size_t max_workers, @@ -401,6 +427,23 @@ virNetServerPtr virNetServerNew(size_t min_workers, sig_action.sa_handler = SIG_IGN; sigaction(SIGPIPE, &sig_action, NULL); + /* + * catch fatal errors to dump stack trace, also hook to USR2 for dynamic + * debugging purposes or testing + */ + sig_action.sa_sigaction = virNetServerFatalSignal; + sig_action.sa_flags = SA_SIGINFO; + sigaction(SIGFPE, &sig_action, NULL); + sigaction(SIGSEGV, &sig_action, NULL); + sigaction(SIGILL, &sig_action, NULL); + sigaction(SIGABRT, &sig_action, NULL); +#ifdef SIGBUS + sigaction(SIGBUS, &sig_action, NULL); +#endif +#ifdef SIGUSR2 + sigaction(SIGUSR2, &sig_action, NULL); +#endif + return srv; error: diff --git a/src/util/virlog.c b/src/util/virlog.c index 056950e..081b194 100644 --- a/src/util/virlog.c +++ b/src/util/virlog.c @@ -30,6 +30,7 @@ #include <sys/stat.h> #include <fcntl.h> #include <unistd.h> +#include <signal.h> #include <execinfo.h> #include <regex.h> #if HAVE_SYSLOG_H @@ -661,6 +662,93 @@ virLogStackTraceToFd(int fd) } static void +virLogDumpAllFD(const char *msg, int len) +{ + size_t i; + bool found = false; + + if (len <= 0) + len = strlen(msg); + + for (i = 0; i < virLogNbOutputs; i++) { + if (virLogOutputs[i].f == virLogOutputToFd) { + int fd = (intptr_t) virLogOutputs[i].data; + + if (fd >= 0) { + if (msg) + ignore_value(safewrite(fd, msg, len)); + else + virLogStackTraceToFd(fd); + + found = true; + } + } + } + if (!found) { + if (msg) + ignore_value(safewrite(STDERR_FILENO, msg, len)); + else + virLogStackTraceToFd(STDERR_FILENO); + } +} + +/** + * virLogEmergencyDumpAll: + * @signum: the signal number + * + * Emergency function called, possibly from a signal handler. + * It need to output the stack trace through the log output + * which are safe to use from a signal handler. + * In case none is found it is emitted to standard error. + */ +void +virLogEmergencyDumpAll(int signum) +{ + switch (signum) { +#ifdef SIGFPE + case SIGFPE: + virLogDumpAllFD("Caught signal Floating-point exception", -1); + break; +#endif +#ifdef SIGSEGV + case SIGSEGV: + virLogDumpAllFD("Caught Segmentation violation", -1); + break; +#endif +#ifdef SIGILL + case SIGILL: + virLogDumpAllFD("Caught illegal instruction", -1); + break; +#endif +#ifdef SIGABRT + case SIGABRT: + virLogDumpAllFD("Caught abort signal", -1); + break; +#endif +#ifdef SIGBUS + case SIGBUS: + virLogDumpAllFD("Caught bus error", -1); + break; +#endif +#ifdef SIGUSR2 + case SIGUSR2: + virLogDumpAllFD("Caught User-defined signal 2", -1); + break; +#endif + default: + virLogDumpAllFD("Caught unexpected signal", -1); + break; + } + + virLogDumpAllFD(" dumping stack trace:\n", -1); + virLogDumpAllFD("\n\n ====== start of stack trace =====\n\n", -1); + + virLogDumpAllFD(NULL, 1); + + virLogDumpAllFD("\n\n ====== end of stack trace =====\n\n", -1); +} + +static void virLogOutputToFd(virLogSourcePtr source ATTRIBUTE_UNUSED, virLogPriority priority ATTRIBUTE_UNUSED, const char *filename ATTRIBUTE_UNUSED, diff --git a/src/util/virlog.h b/src/util/virlog.h index 5b38891..628a2b1 100644 --- a/src/util/virlog.h +++ b/src/util/virlog.h @@ -201,6 +201,8 @@ extern void virLogVMessage(virLogSourcePtr source, const char *fmt, va_list vargs) ATTRIBUTE_FMT_PRINTF(7, 0); +extern void virLogEmergencyDumpAll(int signum); + bool virLogProbablyLogMessage(const char *str); #endif -- 1.7.12.4

On Tue, May 20, 2014 at 08:59:37AM +0000, Wangrui (K) wrote:
An earlier commit(c0c8c1) Dan removed global log buffer feature entirely because of duplicate log messages. An improvement is introduced. That is dumping stack trace instead of log buffer upon libvirt crash.
While I understand the desire here...
+virLogDumpAllFD(const char *msg, int len) +{ + size_t i; + bool found = false; + + if (len <= 0) + len = strlen(msg); + + for (i = 0; i < virLogNbOutputs; i++) { + if (virLogOutputs[i].f == virLogOutputToFd) { + int fd = (intptr_t) virLogOutputs[i].data; + + if (fd >= 0) { + if (msg) + ignore_value(safewrite(fd, msg, len)); + else + virLogStackTraceToFd(fd); + + found = true; + } + } + } + if (!found) { + if (msg) + ignore_value(safewrite(STDERR_FILENO, msg, len)); + else + virLogStackTraceToFd(STDERR_FILENO);
This is not going to work. virLogStackTraceToFd invokes the backtrace/backtrace_symbols_fd functions which are not async signal safe. They are also not likely to be reliable to use when you have memory corruption triggering the signal. The 'abrt' program is commonly used on modern Linux distros to generate stack traces when processes crash / terminate abnormally. abrt has the added benefit that the stack traces it records include all function parameters and local variables. Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On Tue, May 20, 2014 at 10:05:19AM +0100, Daniel P. Berrange wrote:
On Tue, May 20, 2014 at 08:59:37AM +0000, Wangrui (K) wrote:
An earlier commit(c0c8c1) Dan removed global log buffer feature entirely because of duplicate log messages. An improvement is introduced. That is dumping stack trace instead of log buffer upon libvirt crash.
While I understand the desire here...
+virLogDumpAllFD(const char *msg, int len) +{ + size_t i; + bool found = false; + + if (len <= 0) + len = strlen(msg); + + for (i = 0; i < virLogNbOutputs; i++) { + if (virLogOutputs[i].f == virLogOutputToFd) { + int fd = (intptr_t) virLogOutputs[i].data; + + if (fd >= 0) { + if (msg) + ignore_value(safewrite(fd, msg, len)); + else + virLogStackTraceToFd(fd); + + found = true; + } + } + } + if (!found) { + if (msg) + ignore_value(safewrite(STDERR_FILENO, msg, len)); + else + virLogStackTraceToFd(STDERR_FILENO);
This is not going to work. virLogStackTraceToFd invokes the backtrace/backtrace_symbols_fd functions which are not async signal safe. They are also not likely to be reliable to use when you have memory corruption triggering the signal.
The 'abrt' program is commonly used on modern Linux distros to generate stack traces when processes crash / terminate abnormally. abrt has the added benefit that the stack traces it records include all function parameters and local variables.
And if you can't (or don't want to) have abrt then it is pretty easy to make a self-created solution using kernel.core_pattern in /etc/sysctl.conf
Regards, Daniel

-----Original Message----- From: Daniel P. Berrange [mailto:berrange@redhat.com] Sent: Tuesday, May 20, 2014 5:05 PM To: Wangrui (K) Cc: libvir-list@redhat.com; zhouyimin Zhou(Yimin); Yanqiangjun Subject: Re: [PATCH] virlog: Add stack trace log when libvirt receives fatal signal
On Tue, May 20, 2014 at 08:59:37AM +0000, Wangrui (K) wrote:
An earlier commit(c0c8c1) Dan removed global log buffer feature entirely because of duplicate log messages. An improvement is introduced. That is dumping stack trace instead of log buffer upon libvirt crash.
This is not going to work. virLogStackTraceToFd invokes the backtrace/backtrace_symbols_fd functions which are not async signal safe. They are also not likely to be reliable to use when you have memory corruption triggering the signal.
The 'abrt' program is commonly used on modern Linux distros to generate stack traces when processes crash / terminate abnormally. abrt has the added benefit that the stack traces it records include all function parameters and local variables.
Oops. I didn't realize Async-signal-(un)safe functions in virLogStackTraceToFd. And it seems that only kernel coredump can be used in this case. My original purpose is to trace stack on crash/obort and etc without using kernel.core_pattern (sometimes kernel coredump is not turned on ). Thanks.
Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On Wed, May 21, 2014 at 09:37:35AM +0000, Wangrui (K) wrote:
-----Original Message----- From: Daniel P. Berrange [mailto:berrange@redhat.com] Sent: Tuesday, May 20, 2014 5:05 PM To: Wangrui (K) Cc: libvir-list@redhat.com; zhouyimin Zhou(Yimin); Yanqiangjun Subject: Re: [PATCH] virlog: Add stack trace log when libvirt receives fatal signal
On Tue, May 20, 2014 at 08:59:37AM +0000, Wangrui (K) wrote:
An earlier commit(c0c8c1) Dan removed global log buffer feature entirely because of duplicate log messages. An improvement is introduced. That is dumping stack trace instead of log buffer upon libvirt crash.
This is not going to work. virLogStackTraceToFd invokes the backtrace/backtrace_symbols_fd functions which are not async signal safe. They are also not likely to be reliable to use when you have memory corruption triggering the signal.
The 'abrt' program is commonly used on modern Linux distros to generate stack traces when processes crash / terminate abnormally. abrt has the added benefit that the stack traces it records include all function parameters and local variables.
Oops. I didn't realize Async-signal-(un)safe functions in virLogStackTraceToFd. And it seems that only kernel coredump can be used in this case. My original purpose is to trace stack on crash/obort and etc without using kernel.core_pattern (sometimes kernel coredump is not turned on ).
If you use '| /full/path/to/script_that_gets_coredump_on_stdin' for the kernel.core_pattern, it will work even when ulimit is turned off. In that script you can throw away all the coredumps you don't want and capture only those you want and only when you want. You can use default percent syntax to specify parameters for the script and base the decision on that. Martin
Thanks.
Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|
participants (3)
-
Daniel P. Berrange
-
Martin Kletzander
-
Wangrui (K)