Re: [libvirt] [PATCH 4/4] Introduce new OOM testing support

26 Sep 2013

On 09/25/2013 11:23 AM, Daniel P. Berrange wrote:
...
From: "Daniel P. Berrange" <berrange@redhat.com>
The previous OOM testing support would re-run the entire "main"
method each iteration, failing a different malloc each time.
When a test suite has 'n' allocations, the number of repeats
requires is  (n * (n + 1) ) / 2.  This gets very large, very
quickly.
This new OOM testing support instead integrates at the
virtTestRun level, so each individual test case gets repeated,
instead of the entire test suite. This means the values of
'n' are orders of magnitude smaller.
Still O(n^2) - but yes, a smaller n makes for a NOTICEABLE speedup [that
is, 30 alloc's per test on a main() that runs 30 tests is much nicer
than 900 alloc's per main()] :)
...
The simple usage is
$ VIR_TEST_OOM=1 ./qemuxml2argvtest
   ...
   29) QEMU XML-2-ARGV clock-utc                                         ... OK
       Test OOM for nalloc=36 .................................... OK
   30) QEMU XML-2-ARGV clock-localtime                                   ... OK
       Test OOM for nalloc=36 .................................... OK
   31) QEMU XML-2-ARGV clock-france                                      ... OK
       Test OOM for nalloc=38 ...................................... OK
   ...
the second lines reports how many mallocs have to be failed, and thus
how many repeats of the test wil be run.
s/wil/will/
...
If it crashes, then running under valgrind will often show the problem
$ VIR_TEST_OOM=1 ../run valgrind ./qemuxml2argvtest
When debugging problems it is also helpful to select an individual
test case
$ VIR_TEST_RANGE=30 VIR_TEST_OOM=1 ../run valgrind ./qemuxml2argvtest
When things get really tricky, it is possible to request that just
specific allocs are failed. eg to fail allocs 5 -> 12, use
$ VIR_TEST_RANGE=30 VIR_TEST_OOM=1:5-12 ../run valgrind ./qemuxml2argvtest
Please document VIR_TEST_OOM in docs/hacking.html.in (and thus HACKING).
 Does it require building with './configure --enable-test-oom'?
...
In the worse case, you might want to know the stack trace of the
alloc which was failed then VIR_TEST_OOM_TRACE can be set. If it
is set to 1 then it will only print if it things a mistake happened.
s/things/thinks/
...
This is often not reliable, so setting it to 2 will make it print
the stack trace for every alloc that is failed.
$ VIR_TEST_OOM_TRACE=2 VIR_TEST_RANGE=30 VIR_TEST_OOM=1:5-5 ../run valgrind ./qemuxml2argvtest
  30) QEMU XML-2-ARGV clock-localtime                                   ... OK
      Test OOM for nalloc=36 !virAllocN
  /home/berrange/src/virt/libvirt/src/util/viralloc.c:180
  virHashCreateFull
  /home/berrange/src/virt/libvirt/src/util/virhash.c:144
  virDomainDefParseXML
  /home/berrange/src/virt/libvirt/src/conf/domain_conf.c:11745
  virDomainDefParseNode
  /home/berrange/src/virt/libvirt/src/conf/domain_conf.c:12646
  virDomainDefParse
  /home/berrange/src/virt/libvirt/src/conf/domain_conf.c:12590
  testCompareXMLToArgvFiles
  /home/berrange/src/virt/libvirt/tests/qemuxml2argvtest.c:106
  virtTestRun
  /home/berrange/src/virt/libvirt/tests/testutils.c:250
  mymain
  /home/berrange/src/virt/libvirt/tests/qemuxml2argvtest.c:418 (discriminator 2)
  virtTestMain
  /home/berrange/src/virt/libvirt/tests/testutils.c:750
  ??
  ??:0
  _start
  ??:?
   FAILED
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
---
 tests/cputest.c          |   3 +-
 tests/qemuargv2xmltest.c |  14 ++--
 tests/qemuxml2argvtest.c |  12 ++-
 tests/qemuxmlnstest.c    |  18 +++--
 tests/testutils.c        | 189 ++++++++++++++++++++++++++++++++++++++++++++++-
 tests/testutils.h        |   2 +
 6 files changed, 216 insertions(+), 22 deletions(-)

diff --git a/tests/cputest.c b/tests/cputest.c
index 8e3640b..17cb6af 100644
--- a/tests/cputest.c
+++ b/tests/cputest.c
@@ -476,7 +476,8 @@ cpuTestRun(const char *name, const struct data *data)
         if (virTestGetDebug()) {
             char *log;
             if ((log = virtTestLogContentAndReset()) &&
-                 strlen(log) > 0)
+                log != NULL &&
+                strlen(log) > 0)
Spurious change adding dead code.  You already checked that log was
non-null in the left half of the &&.
...
+++ b/tests/testutils.c
@@ -47,6 +47,9 @@
 #include "virprocess.h"
 #include "virstring.h"
+#include <dlfcn.h>
+#include <execinfo.h>
Needs to be conditional (these are not universal headers; gnulib won't
help you).
...
+#ifdef TEST_OOM
+static unsigned int testOOM = 0;
+static unsigned int testOOMStart = -1;
+static unsigned int testOOMEnd = -1;
+static unsigned int testOOMTrace = 0;
+# ifdef TEST_OOM_TRACE
Why the two conditions?  Or is TEST_OOM_TRACE based on whether
<execinfo.h> is present?
...
+virTestShowTrace(void)
+{
+    size_t j;
+    for (j = 2; j < ntestAllocStack; j++) {
+        Dl_info info;
+        char *cmd;
+
+        dladdr(testAllocStack[j], &info);
+        if (info.dli_fname &&
+            strstr(info.dli_fname, ".so")) {
+            if (virAsprintf(&cmd, "addr2line -f -e %s %p",
+                            info.dli_fname,
+                            ((void*)((unsigned long long)testAllocStack[j]
+                                     - (unsigned long long)info.dli_fbase))) < 0)
+                continue;
+        } else {
+            if (virAsprintf(&cmd, "addr2line -f -e %s %p",
+                            (char*)(info.dli_fname ? info.dli_fname : "<unknown>"),
+                            testAllocStack[j]) < 0)
+                continue;
+        }
+        ignore_value(system(cmd));
configure.ac defined TEST_OOM_TRACE solely based on whether
'backtrace()' exists; but as you are also using addr2line and looking
for .so files, which starts to feel specific to Linux, is it more
conservative to disable this option on non-Linux?
...
@@ -168,7 +226,7 @@ virtTestRun(const char *title,
             !((testCounter-1) % 40)) {
             fprintf(stderr, " %-3zu\n", (testCounter-1));
             fprintf(stderr, "      ");
-            }
+        }
Missed in patch 3/4?
...
+        testOOMActive = true;
+        for (i = start; i < end; i++) {
+            bool missingFail = false;
+# ifdef TEST_OOM_TRACE
+            memset(testAllocStack, 0, ARRAY_CARDINALITY(testAllocStack));
+            ntestAllocStack = 0;
+# endif
+            virAllocTestOOM(i+1, 1);
spaces around +
...
+            oomret = body(data);
+
+            /* fprintf() disabled because XML parsing APIs don't allow
+             * distinguish between element / attribute not present
+             * in the XML (which is non-fatal), vs OOM / malformed
+             * which should be fatal. Thus error reporting for
+             * optionally present XML is mostly broken.
+             */
+            if (oomret == 0) {
+                missingFail = true;
+# if 0
+                fprintf(stderr, " alloc %zu failed but no err status\n", i + 1);
+# endif
+            } else {
+                virErrorPtr lerr = virGetLastError();
+                if (!lerr) {
+# if 0
+                    fprintf(stderr, " alloc %zu failed but no error report\n", i + 1);
+# endif
+                    missingFail = true;
+                }
+            }
+            if ((missingFail && testOOMTrace) || (testOOMTrace > 1)) {
Safe to drop () around 'testOOMTrace > 1'
...
+                fprintf(stderr, "%s", "!");
+# ifdef TEST_OOM_TRACE
+                virTestShowTrace();
+# endif
+                ret = -1;
+            } else {
+                fprintf(stderr, "%s", ".");
+            }
+        }
+        testOOMActive = false;
+        if (ret == 0)
+            fprintf(stderr, " OK\n");
+        else
+            fprintf(stderr, " FAILED\n");
+        virAllocTestInit();
+    }
+#endif /* TEST_OOM */
+
     return ret;
 }
@@ -205,7 +334,7 @@ virtTestLoadFile(const char *file, char **buf)
     tmplen = buflen = st.st_size + 1;
if (VIR_ALLOC_N(*buf, buflen) < 0) {
-        fprintf(stderr, "%s: larger than available memory (> %d)\n", file, buflen);
+        //fprintf(stderr, "%s: larger than available memory (> %d)\n", file, buflen);
Is this hunk intentional?
...
VIR_FORCE_FCLOSE(fp);
         return -1;
     }
@@ -481,7 +610,8 @@ virtTestLogOutput(virLogSource source ATTRIBUTE_UNUSED,
 {
     struct virtTestLogData *log = data;
     virCheckFlags(VIR_LOG_STACK_TRACE,);
-    virBufferAsprintf(&log->buf, "%s: %s", timestamp, str);
+    if (!testOOMActive)
+        virBufferAsprintf(&log->buf, "%s: %s", timestamp, str);
 }
static void
@@ -550,6 +680,9 @@ int virtTestMain(int argc,
     int ret;
     bool abs_srcdir_cleanup = false;
     char *testRange = NULL;
+#ifdef TEST_OOM
+    char *oomstr;
+#endif
abs_srcdir = getenv("abs_srcdir");
     if (!abs_srcdir) {
@@ -610,6 +743,56 @@ int virtTestMain(int argc,
         }
     }
+#ifdef TEST_OOM
+    if ((oomstr = getenv("VIR_TEST_OOM")) != NULL) {
+        char *next;
+        if (testDebug == -1)
+            testDebug = 1;
+        testOOM = 1;
+        if (oomstr[0] != '\0' &&
+            oomstr[1] == ':') {
+            if (virStrToLong_ui(oomstr + 2, &next, 10, &testOOMStart) < 0) {
+                fprintf(stderr, "Cannot parse range %s\n", oomstr);
+                return EXIT_FAILURE;
+            }
+            if (*next != '-') {
+                fprintf(stderr, "Cannot parse range %s\n", oomstr);
+                return EXIT_FAILURE;
+            }
Worth accepting VIR_TEST_OOM=1:1 as shorthand for 1:1-1?

I like where it's headed.

-- 
Eric Blake   eblake redhat com    +1-919-301-3266
Libvirt virtualization library http://libvirt.org