[libvirt] [PATCH] Make locking more debuggable from logs

With this patch, there is new ./configure option '--enable-lock-debug' which controls whether we want to turn on debugging locks. This feature is exposed as a ./configure option due to its huge overhead in speed/log size and is only meant to be used with this in mind. It is designed in a way that even log from deadlocked daemon should provide enough info to find the deadlock itself. Every matching Lock() call can be matched with its Unlock() call and with every Lock() called it is visible whether it failed or not. Unlock() call follows the same output, even though unnecessary (the call cannot block). Lock logging is disabled while logging because that would either recurse or deadlock. Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- configure.ac | 19 +++++++++++++++ src/Makefile.am | 3 ++- src/libvirt_private.syms | 1 + src/util/virlog.c | 8 +++++++ src/util/virthread.h | 4 ++++ src/util/virthreadpthread.c | 58 ++++++++++++++++++++++++++++++++++++++++++++- src/util/virutil.c | 40 +++++++++++++++++++++++++++++++ src/util/virutil.h | 2 ++ 8 files changed, 133 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index b5af0d3..c63ffe7 100644 --- a/configure.ac +++ b/configure.ac @@ -478,6 +478,24 @@ if test x"$enable_debug" = x"yes"; then AC_DEFINE([ENABLE_DEBUG], [], [whether debugging is enabled]) fi +dnl --enable-lock-debug=(yes|no) +AC_ARG_ENABLE([lock-debug], + [AC_HELP_STRING([--enable-lock-debug=@<:@no|yes@:>@], + [enable lock debugging (beware of huge overhead) @<:@default=no@:>@])], + [],[enable_lock_debug=no]) +AM_CONDITIONAL([ENABLE_LOCK_DEBUG], test x"$enable_lock_debug" = x"yes") +if test x"$enable_lock_debug" = x"yes"; then + if test "$ac_cv_header_pthread_h" != "yes" ; then + AC_MSG_ERROR([POSIX threads are needed to properly debug locking]) + fi + if test x"$enable_debug" != x"yes" ; then + AC_MSG_ERROR([Cannot debug locking without enabling debugging itself]) + fi + + AC_DEFINE([ENABLE_LOCK_DEBUG], [], [whether lock debugging is enabled]) + LOCK_DEBUG_LDFLAGS="-rdynamic" +fi +AC_SUBST([LOCK_DEBUG_LDFLAGS]) dnl @@ -2601,6 +2619,7 @@ AC_MSG_NOTICE([]) AC_MSG_NOTICE([Miscellaneous]) AC_MSG_NOTICE([]) AC_MSG_NOTICE([ Debug: $enable_debug]) +AC_MSG_NOTICE([ Lock debug: $enable_lock_debug]) AC_MSG_NOTICE([ Use -Werror: $set_werror]) AC_MSG_NOTICE([ Warning Flags: $WARN_CFLAGS]) AC_MSG_NOTICE([ Readline: $lv_use_readline]) diff --git a/src/Makefile.am b/src/Makefile.am index 8fa8680..ef6aaa3 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -33,7 +33,8 @@ AM_CFLAGS = $(LIBXML_CFLAGS) \ $(WIN32_EXTRA_CFLAGS) \ $(COVERAGE_CFLAGS) AM_LDFLAGS = $(DRIVER_MODULE_LDFLAGS) \ - $(COVERAGE_LDFLAGS) + $(COVERAGE_LDFLAGS) \ + $(LOCK_DEBUG_LDFLAGS) EXTRA_DIST = $(conf_DATA) util/keymaps.csv diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 7790ede..44b4a60 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -2022,6 +2022,7 @@ virGetGroupID; virGetGroupList; virGetGroupName; virGetHostname; +virGetLockFunc; virGetUnprivSGIOSysfsPath; virGetUserCacheDirectory; virGetUserConfigDirectory; diff --git a/src/util/virlog.c b/src/util/virlog.c index d1fb0b3..8d1e943 100644 --- a/src/util/virlog.c +++ b/src/util/virlog.c @@ -150,14 +150,22 @@ virMutex virLogMutex; void virLogLock(void) { +#ifdef ENABLE_LOCK_DEBUG + virMutexLockNoLog(&virLogMutex); +#else virMutexLock(&virLogMutex); +#endif } void virLogUnlock(void) { +#ifdef ENABLE_LOCK_DEBUG + virMutexUnlockNoLog(&virLogMutex); +#else virMutexUnlock(&virLogMutex); +#endif } diff --git a/src/util/virthread.h b/src/util/virthread.h index 84d3bdc..6d86227 100644 --- a/src/util/virthread.h +++ b/src/util/virthread.h @@ -88,6 +88,10 @@ void virMutexDestroy(virMutexPtr m); void virMutexLock(virMutexPtr m); void virMutexUnlock(virMutexPtr m); +# ifdef ENABLE_LOCK_DEBUG +void virMutexLockNoLog(virMutexPtr m); +void virMutexUnlockNoLog(virMutexPtr m); +# endif int virCondInit(virCondPtr c) ATTRIBUTE_RETURN_CHECK; diff --git a/src/util/virthreadpthread.c b/src/util/virthreadpthread.c index ca841e4..95e20b5 100644 --- a/src/util/virthreadpthread.c +++ b/src/util/virthreadpthread.c @@ -29,6 +29,11 @@ #include "viralloc.h" +#ifdef ENABLE_LOCK_DEBUG +# include "virutil.h" +# include "virlog.h" +#endif + /* Nothing special required for pthreads */ int virThreadInitialize(void) @@ -81,7 +86,57 @@ void virMutexDestroy(virMutexPtr m) pthread_mutex_destroy(&m->lock); } -void virMutexLock(virMutexPtr m){ +#ifdef ENABLE_LOCK_DEBUG + +# define PRE_LOCK_PROC(mutex) \ + char *frame = virGetLockFrame(); \ + do { \ + if (frame) \ + VIR_DEBUG("lock=%p, frame=%s, state=request", \ + &mutex->lock, NULLSTR(frame)); \ + else \ + VIR_DEBUG("lock=%p, state=request", &mutex->lock); \ + } while (0) + +# define POST_LOCK_PROC(mutex) \ + do { \ + if (frame) { \ + VIR_DEBUG("lock=%p, frame=%s, state=done", \ + &mutex->lock, NULLSTR(frame)); \ + VIR_FREE(frame); \ + } else { \ + VIR_DEBUG("lock=%p, state=done", &mutex->lock); \ + } \ + } while (0) + +void virMutexLock(virMutexPtr m) +{ + PRE_LOCK_PROC(m); + virMutexLockNoLog(m); + POST_LOCK_PROC(m); +} + +void virMutexUnlock(virMutexPtr m) +{ + PRE_LOCK_PROC(m); + virMutexUnlockNoLog(m); + POST_LOCK_PROC(m); +} + +void virMutexLockNoLog(virMutexPtr m) +{ + pthread_mutex_lock(&m->lock); +} + +void virMutexUnlockNoLog(virMutexPtr m) +{ + pthread_mutex_unlock(&m->lock); +} + +#else /* ENABLE_LOCK_DEBUG */ + +void virMutexLock(virMutexPtr m) +{ pthread_mutex_lock(&m->lock); } @@ -90,6 +145,7 @@ void virMutexUnlock(virMutexPtr m) pthread_mutex_unlock(&m->lock); } +#endif /* ENABLE_LOCK_DEBUG */ int virCondInit(virCondPtr c) { diff --git a/src/util/virutil.c b/src/util/virutil.c index 0b54ef7..88b7984 100644 --- a/src/util/virutil.c +++ b/src/util/virutil.c @@ -40,6 +40,7 @@ #include <string.h> #include <termios.h> #include <locale.h> +#include <execinfo.h> #if HAVE_LIBDEVMAPPER_H # include <libdevmapper.h> @@ -2017,3 +2018,42 @@ virCompareLimitUlong(unsigned long long a, unsigned long b) return -1; } + + +/** + * virGetLockFrame: + * + * This function returns possibly the most relevant frame information + * from where a lock function was called. This info is obtained from + * a backtrace while going through the functions and filtering those + * which end with 'Lock'. + * + * We don't want to log anything from this method, not even an OOM + * Error, in case of any problems, just cleanup and return NULL. + */ +char * +virGetLockFrame(void) +{ + char **fnames = NULL; + char *out = NULL; + int i = 0; + int nframes = 0; + void *frames[100] = {0}; + + if (!(nframes = backtrace(frames, ARRAY_CARDINALITY(frames))) || + !(fnames = backtrace_symbols(frames, nframes))) + goto cleanup; + + for (i = 0; i < nframes; i++) { + if (!strstr(fnames[i], "Lock") && + !strstr(fnames[i], "Unlock") && + !strstr(fnames[i], "(+")) { + ignore_value(VIR_STRDUP_QUIET(out, fnames[i])); + break; + } + } + + cleanup: + VIR_FREE(fnames); + return out; +} diff --git a/src/util/virutil.h b/src/util/virutil.h index 0083c88..e385ae6 100644 --- a/src/util/virutil.h +++ b/src/util/virutil.h @@ -169,4 +169,6 @@ char *virFindFCHostCapableVport(const char *sysfs_prefix); int virCompareLimitUlong(unsigned long long a, unsigned long b); +char *virGetLockFrame(void); + #endif /* __VIR_UTIL_H__ */ -- 1.8.3.2

On Mon, Jul 22, 2013 at 03:44:04PM +0200, Martin Kletzander wrote:
With this patch, there is new ./configure option '--enable-lock-debug' which controls whether we want to turn on debugging locks. This feature is exposed as a ./configure option due to its huge overhead in speed/log size and is only meant to be used with this in mind. It is designed in a way that even log from deadlocked daemon should provide enough info to find the deadlock itself. Every matching Lock() call can be matched with its Unlock() call and with every Lock() called it is visible whether it failed or not. Unlock() call follows the same output, even though unnecessary (the call cannot block).
Lock logging is disabled while logging because that would either recurse or deadlock.
Signed-off-by: Martin Kletzander <mkletzan@redhat.com>
I'm really inclined to say that anyone wanting todo lock debugging should just use systemtap / dtrace todo it, since it is better in every way. You don't need any special compile options to enable it, tracing scripts have little impact on operation of the program being debugged, you can trivially filter data output so that it only reports locking of specific objects, you can trace and synchronize across processes and languages & kernel/userspace. So I don't really think there is any compelling reason to include lock debugging code in libvirt. Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On 07/22/2013 07:58 AM, Daniel P. Berrange wrote:
I'm really inclined to say that anyone wanting todo lock debugging should just use systemtap / dtrace todo it, since it is better in every way.
I tend to agree on that point, with one caveat - we need to have good documentation on how to do systemtap debugging of libvirt, in order to make it easy to point someone to the steps they need to follow to benefit from it. Our existing web pages on hacking are a bit sparse on this front. -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

On 07/22/2013 06:31 PM, Eric Blake wrote:
On 07/22/2013 07:58 AM, Daniel P. Berrange wrote:
I'm really inclined to say that anyone wanting todo lock debugging should just use systemtap / dtrace todo it, since it is better in every way.
I tend to agree on that point, with one caveat - we need to have good documentation on how to do systemtap debugging of libvirt, in order to make it easy to point someone to the steps they need to follow to benefit from it. Our existing web pages on hacking are a bit sparse on this front.
OK, thanks for your input. My point was to enable this for people who don't want to (or can't) use systemtap for various reasons, but I guess nobody can be *that* lazy to do 'package_app install_command systemtap' and then cleanup afterwards. I like your idea of explaining systemtap debugging with libvirt on wiki/documentation. I'll try to combine it with additional systemtap examples and put it all together somehow. But I need you to check my grammar, since it tends to be a longer text. Thanks again and have a nice day, Martin

On Tue, Jul 23, 2013 at 08:34:35AM +0200, Martin Kletzander wrote:
On 07/22/2013 06:31 PM, Eric Blake wrote:
On 07/22/2013 07:58 AM, Daniel P. Berrange wrote:
I'm really inclined to say that anyone wanting todo lock debugging should just use systemtap / dtrace todo it, since it is better in every way.
I tend to agree on that point, with one caveat - we need to have good documentation on how to do systemtap debugging of libvirt, in order to make it easy to point someone to the steps they need to follow to benefit from it. Our existing web pages on hacking are a bit sparse on this front.
OK, thanks for your input. My point was to enable this for people who don't want to (or can't) use systemtap for various reasons, but I guess nobody can be *that* lazy to do 'package_app install_command systemtap' and then cleanup afterwards.
I like your idea of explaining systemtap debugging with libvirt on wiki/documentation. I'll try to combine it with additional systemtap examples and put it all together somehow. But I need you to check my grammar, since it tends to be a longer text.
Could add an example systemtap script demonstrating how todo it in the examples/systemtap directory too. Let me know if you need any assistance with systemtap. There's a little bit of a learning curve, but usually not as bad as people fear it will be. IME, it is well worth any effect spent learning how to use it, you'll make back the time many times over once you know it. Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|
participants (3)
-
Daniel P. Berrange
-
Eric Blake
-
Martin Kletzander