As discussed before, this simple script should help with debugging
deadlocks, although there are still some caveats. RWLocks are not
handled by this and if your deadlock if very racy, it may not lock
up when running with this script due to the slowdown.
Signed-off-by: Martin Kletzander <mkletzan(a)redhat.com>
---
examples/systemtap/lock-debug.stp | 118 ++++++++++++++++++++++++++++++++++++++
1 file changed, 118 insertions(+)
create mode 100644 examples/systemtap/lock-debug.stp
diff --git a/examples/systemtap/lock-debug.stp b/examples/systemtap/lock-debug.stp
new file mode 100644
index 0000000..8f06340
--- /dev/null
+++ b/examples/systemtap/lock-debug.stp
@@ -0,0 +1,118 @@
+#!/usr/bin/stap --ldd -d /usr/sbin/libvirtd -c libvirtd
+#
+# Usage with installed libvirt daemon:
+# stap --ldd -d /usr/sbin/libvirtd -c libvirtd \
+# lock-debug.stp /usr/lib/libvirt.so
+#
+# If made executable; simple './lock-debug.stp' should work too.
+#
+# TODOs:
+#
+# Document usage with uninstalled daemon and libs. Assuming CWD is toplevel
+# source git directory, it should be only slight modification to the following:
+#
+# ./run stap --ldd -c daemon/libvirtd -d daemon/libvirtd
+# examples/systemtap/lock-debug.stp src/.libs/libvirt.so
+#
+# Debug RWLock mechanisms as well.
+#
+# Author: Martin Kletzander <mkletzan(a)redhat.com>
+
+
+global mx_tolock
+global mx_locked
+
+
+function filter()
+{
+ if (pid() != target())
+ return 1
+
+ return 0
+}
+
+probe library = process( %( $# > 0 %? @1 %: "/usr/lib/libvirt.so" %) )
+{
+ if (filter()) next
+}
+
+probe lock = library.function("virMutexLock")
+{
+ lockname = usymdata($m)
+}
+
+probe unlock = library.function("virMutexUnlock")
+{
+ lockname = usymdata($m)
+}
+
+probe begin
+{
+ %( $# > 1 %? println("error: Too many parameters"); exit();
+ %: print("Started, press ^C when the proccess hangs\n"); %)
+}
+
+probe lock.call
+{
+ mx_tolock[lockname, tid()] = sprint_usyms(ubacktrace())
+}
+
+probe lock.return
+{
+ if ([lockname, tid()] in mx_tolock) {
+ mx_locked[lockname, tid()] = mx_tolock[lockname, tid()]
+ delete mx_tolock[lockname, tid()]
+ } else {
+ printf("internal error: lock acquired unwillingly?\n")
+ }
+}
+
+probe unlock.call
+{
+ found = 0
+
+ foreach ([lock, tid] in mx_locked) {
+ if (lock != lockname)
+ continue
+ if (tid != tid()) {
+ printf("Warning: lock released on different thread that locked
it.\n")
+ printf("Lock trace:\n%s\n", mx_locked[lock, tid])
+ printf("Unlock trace:\n%s\n", sprint_usyms(ubacktrace()))
+ }
+
+ found = tid
+ break
+ }
+
+ if (found && [lockname, found] in mx_locked)
+ delete mx_locked[lockname, found]
+}
+
+probe end
+{
+ tmp = 0
+
+ printf("\n=============\n")
+
+ foreach (bt1 = [lock1, tid1] in mx_tolock) {
+ deadlock = 0
+
+ foreach (bt2 = [lock2, tid2] in mx_tolock) {
+ if (lock1 == lock2) {
+ if (!tmp++)
+ printf("The following locks cannot be acquired:\n")
+
+ if (!deadlock++)
+ printf("Lock %s was locked in thread %d with this
trace:\n%s\n",
+ lock1, tid1, bt1)
+
+ printf("and is waiting to be locked by thread %d here:\n%s\n",
+ tid2, bt2)
+ }
+ }
+ if (deadlock)
+ printf("---\n")
+ }
+ if (!tmp)
+ printf("No deadlocks found, sorry.\n")
+}
--
2.1.2
Show replies by date
On 29.10.2014 14:44, Martin Kletzander wrote:
As discussed before, this simple script should help with debugging
deadlocks, although there are still some caveats. RWLocks are not
handled by this and if your deadlock if very racy, it may not lock
up when running with this script due to the slowdown.
Signed-off-by: Martin Kletzander <mkletzan(a)redhat.com>
---
examples/systemtap/lock-debug.stp | 118 ++++++++++++++++++++++++++++++++++++++
1 file changed, 118 insertions(+)
create mode 100644 examples/systemtap/lock-debug.stp
ACK. Cool idea.
Michal