This is a wrapper for codespell [1], a spell checker for source code.
Codespell does not compare words to a dictionary, but rather works by
checking words against a list of common typos, making it produce fewer
false positives than other solutions.
The script in this patch works around the lack of per-directory ignore
lists and some oddities regarding capitalization in ignore lists.
[1] (
https://github.com/codespell-project/codespell/)
RFC:
Is there interest in having something like this in CI?
Examples of spelling mistakes that were found using codespell:
4ad3c95f4bef5c7c9657de470fb74a4d14c8a331,
785a11cec8693de7df024aae68975dd1799b646a,
1452317b5c727eb17178942012f57f0c37631ae4.
Signed-off-by: Tim Wiederhake <twiederh(a)redhat.com>
---
scripts/check-spelling.py | 115 ++++++++++++++++++++++++++++++++++++++
1 file changed, 115 insertions(+)
create mode 100755 scripts/check-spelling.py
diff --git a/scripts/check-spelling.py b/scripts/check-spelling.py
new file mode 100755
index 0000000000..01371c0d1e
--- /dev/null
+++ b/scripts/check-spelling.py
@@ -0,0 +1,115 @@
+#!/usr/bin/env python3
+
+import argparse
+import re
+import subprocess
+import os
+
+
+IGNORE_LIST = [
+ # ignore all translation files
+ ("/po/", []),
+
+ # ignore this script
+ ("/scripts/check-spelling.py", []),
+
+ # 3rd-party: keycodemapdb
+ ("/src/keycodemapdb/", []),
+
+ # 3rd-party: VirtualBox SDK
+ ("/src/vbox/vbox_CAPI", [
+ "aAdd",
+ "aCount",
+ "aLocation",
+ "aNumber",
+ "aParent",
+ "progess"]),
+
+ # 3rd-party: qemu
+ ("/tests/qemucapabilitiesdata/caps_", "encyption"),
+
+ # other
+ ("/", ["msdos", "MSDOS", "wan",
"WAN", "hda", "HDA", "inout"]),
+ ("/NEWS.rst", ["crashers"]),
+ ("/docs/gitdm/companies/others", "Archiv"),
+ ("/docs/glib-adoption.rst", ["preferrable"]),
+ ("/docs/js/main.js", "whats"),
+ ("/examples/polkit/libvirt-acl.rules", ["userA",
"userB", "userC"]),
+ ("/src/libvirt-domain.c", "PTD"),
+ ("/src/libxl/libxl_logger.c", ["purposedly"]),
+ ("/src/nwfilter/nwfilter_dhcpsnoop.c", "ether"),
+ ("/src/nwfilter/nwfilter_ebiptables_driver.c", "parm"),
+ ("/src/nwfilter/nwfilter_learnipaddr.c", "ether"),
+ ("/src/qemu/qemu_agent.c", "crypted"),
+ ("/src/qemu/qemu_agent.h", "crypted"),
+ ("/src/security/apparmor/libvirt-lxc", "devic"),
+ ("/src/security/apparmor/libvirt-qemu", "readby"),
+ ("/src/storage_file/storage_file_probe.c", "conectix"),
+ ("/src/util/virnetdevmacvlan.c", "calld"),
+ ("/src/util/virtpm.c", "parm"),
+ ("/tests/qemuagenttest.c", "IST"),
+ ("/tests/storagepoolxml2xml", "cant"),
+ ("/tests/sysinfodata/", ["sie"]),
+ ("/tests/testutils.c", ["nIn"]),
+ ("/tests/vircgroupdata/ovirt-node-6.6.mounts", "hald"),
+ ("/tests/virhostcpudata/", ["sie"]),
+ ("/tools/virt-host-validate-common.c", ["sie"]),
+]
+
+
+def check_spelling(directory):
+ """Returns list of tuple(filename, line number, word,
suggestion)."""
+ process = subprocess.run(
+ ["codespell", directory],
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ universal_newlines=True)
+
+ if process.returncode not in (0, 65):
+ exit("error: unexpected returncode %s" % process.returncode)
+
+ if process.stderr:
+ exit("error: unexpected output to stderr: \"%s\"" %
process.stderr)
+
+ line_pattern = re.compile("^(.*):(.*): (.*) ==> (.*)$")
+ for line in process.stdout.split("\n"):
+ line = line.strip().replace(directory, "")
+ if not line:
+ continue
+ match = line_pattern.match(line)
+ if not match:
+ exit("error: unexpected line: \"%s\"" % line)
+ yield match.groups()
+
+
+def ignore(filename, linenumber, word, suggestion):
+ # Ignore abbreviations and ad-hoc variable names
+ if len(word) <= 2:
+ return True
+
+ for f, w in IGNORE_LIST:
+ if not filename.startswith(f):
+ continue
+ if word in w or not w:
+ return True
+ return False
+
+
+def main():
+ parser = argparse.ArgumentParser(description="Check spelling")
+ parser.add_argument(
+ "dir",
+ help="Path to source directory",
+ type=os.path.realpath)
+ args = parser.parse_args()
+
+ findings = [f for f in check_spelling(args.dir) if not ignore(*f)]
+ if findings:
+ template = "(\"{0}\", \"{2}\"),\t# line {1},
\"{3}\"?"
+ for finding in findings:
+ print(template.format(*finding))
+ exit("error: %s spelling errors" % len(findings))
+
+
+if __name__ == "__main__":
+ main()
--
2.31.1