Cache the results of clang-tidy. The cache is keyed, for each file, on:
* the file name,
* the exact command used to compile the file to detect changes in arguments,
* the hash of the preprocessor stage output to detect changes in includes.
A later patch also adds the list of enabled checks to the cache key.
Running clang-tidy uncached takes between 95 and 110 minutes single threaded
(just over 9 minutes wall time on a 12 core builder), depending on the set of
enabled checks. In the ideal case, where no source files changes, this number
is reduced to 80 seconds (9 seconds on a 12 core builder), when caching is
enabled.
This makes clang-tidy much more pleasant to work with locally, but is not
enough to guarantee painless CI operation: While GitLab does support caching
between builds and can be configured to retain the cache even when the job
fails, this does not happen when the job times out after 60 minutes or the
job is manually aborted.
Signed-off-by: Tim Wiederhake <twiederh(a)redhat.com>
---
scripts/run-clang-tidy.py | 83 ++++++++++++++++++++++++++++++++++++++-
1 file changed, 81 insertions(+), 2 deletions(-)
diff --git a/scripts/run-clang-tidy.py b/scripts/run-clang-tidy.py
index dc5880878b..cc9c20ea32 100755
--- a/scripts/run-clang-tidy.py
+++ b/scripts/run-clang-tidy.py
@@ -1,14 +1,17 @@
#!/usr/bin/env python3
import argparse
+import hashlib
import json
import multiprocessing
import os
import queue
import re
+import shlex
import subprocess
import sys
import threading
+import time
spam = [
@@ -44,6 +47,10 @@ def parse_args():
default=multiprocessing.cpu_count(),
type=int,
help="Number of threads to run")
+ parser.add_argument(
+ "--cache",
+ dest="cache",
+ help="Path to cache directory")
return parser.parse_args()
@@ -67,14 +74,75 @@ def run_clang_tidy(item):
}
+def cache_name(item):
+ if not args.cache:
+ return None
+
+ cmd = shlex.split(item["command"])
+ for index, element in enumerate(cmd):
+ if element == "-o":
+ cmd[index + 1] = "/dev/stdout"
+ continue
+ if element == "-MD":
+ cmd[index] = None
+ if element in ("-MQ", "-MF"):
+ cmd[index] = None
+ cmd[index + 1] = None
+ cmd = [c for c in cmd if c is not None]
+ cmd.append("-E")
+
+ result = subprocess.run(
+ cmd,
+ stdout=subprocess.PIPE,
+ universal_newlines=True)
+
+ if result.returncode != 0:
+ return None
+
+ hashsum = hashlib.sha256()
+ hashsum.update(item["command"].encode())
+ hashsum.update(result.stdout.encode())
+
+ basename = "".join([c if c.isalnum() else "_" for c in
item["output"]])
+ return os.path.join(args.cache, "%s-%s" % (basename, hashsum.hexdigest()))
+
+
+def cache_read(filename):
+ if filename is None:
+ return None
+
+ try:
+ with open(filename) as f:
+ return json.load(f)
+ except FileNotFoundError:
+ pass
+ except json.decoder.JSONDecodeError:
+ pass
+ return None
+
+
+def cache_write(filename, result):
+ if filename is None:
+ return
+
+ with open(filename, "w") as f:
+ json.dump(result, f)
+
+
def worker():
while True:
item = items.get()
os.chdir(item["directory"])
- print(item["file"])
+ cache = cache_name(item)
+ result = cache_read(cache)
+ with lock:
+ print(item["file"], "" if result is None else "(from
cache)")
+
+ if result is None:
+ result = run_clang_tidy(item)
- result = run_clang_tidy(item)
+ cache_write(cache, result)
with lock:
if result["returncode"] != 0:
@@ -92,6 +160,10 @@ items = queue.Queue()
lock = threading.Lock()
findings = list()
+if args.cache:
+ args.cache = os.path.abspath(args.cache)
+ os.makedirs(args.cache, exist_ok=True)
+
for _ in range(args.thread_num):
threading.Thread(target=worker, daemon=True).start()
@@ -102,6 +174,13 @@ with open(os.path.join(args.build_dir,
"compile_commands.json")) as f:
items.join()
+if args.cache:
+ cutoffdate = time.time() - 7 * 24 * 60 * 60
+ for filename in os.listdir(args.cache):
+ pathname = os.path.join(args.cache, filename)
+ if os.path.getmtime(pathname) < cutoffdate:
+ os.remove(pathname)
+
if findings:
print("Findings in %s file(s):" % len(findings))
for finding in findings:
--
2.26.2