As part of an goal to eliminate Perl from libvirt build tools,
rewrite the check-spacing.pl tool in Python.
This was a straight conversion, manually going line-by-line to
change the syntax from Perl to Python. Thus the overall structure
of the file and approach is the same.
Signed-off-by: Daniel P. Berrangé <berrange(a)redhat.com>
---
Makefile.am | 2 +-
build-aux/check-spacing.pl | 198 --------------------------------
build-aux/syntax-check.mk | 4 +-
scripts/check-spacing.py | 229 +++++++++++++++++++++++++++++++++++++
4 files changed, 232 insertions(+), 201 deletions(-)
delete mode 100755 build-aux/check-spacing.pl
create mode 100755 scripts/check-spacing.py
diff --git a/Makefile.am b/Makefile.am
index 9471cf7117..5187ca6cc2 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -46,7 +46,7 @@ EXTRA_DIST = \
README.md \
AUTHORS.in \
scripts/augeas-gentest.py \
- build-aux/check-spacing.pl \
+ scripts/check-spacing.py \
build-aux/header-ifdef.pl \
scripts/minimize-po.py \
build-aux/mock-noinline.pl \
diff --git a/build-aux/check-spacing.pl b/build-aux/check-spacing.pl
deleted file mode 100755
index 33377f3dd3..0000000000
--- a/build-aux/check-spacing.pl
+++ /dev/null
@@ -1,198 +0,0 @@
-#!/usr/bin/env perl
-#
-# check-spacing.pl: Report any usage of 'function (..args..)'
-# Also check for other syntax issues, such as correct use of ';'
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library. If not, see
-# <
http://www.gnu.org/licenses/>.
-
-use strict;
-use warnings;
-
-my $ret = 0;
-my $incomment = 0;
-
-foreach my $file (@ARGV) {
- # Per-file variables for multiline Curly Bracket (cb_) check
- my $cb_linenum = 0;
- my $cb_code = "";
- my $cb_scolon = 0;
-
- open FILE, $file;
-
- while (defined (my $line = <FILE>)) {
- my $data = $line;
- # For temporary modifications
- my $tmpdata;
-
- # Kill any quoted , ; = or "
- $data =~ s/'[";,=]'/'X'/g;
-
- # Kill any quoted strings
- $data =~ s,"(?:[^\\\"]|\\.)*","XXX",g;
-
- next if $data =~ /^#/;
-
- # Kill contents of multi-line comments
- # and detect end of multi-line comments
- if ($incomment) {
- if ($data =~ m,\*/,) {
- $incomment = 0;
- $data =~ s,^.*\*/,*/,;
- } else {
- $data = "";
- }
- }
-
- # Kill single line comments, and detect
- # start of multi-line comments
- if ($data =~ m,/\*.*\*/,) {
- $data =~ s,/\*.*\*/,/* */,;
- } elsif ($data =~ m,/\*,) {
- $incomment = 1;
- $data =~ s,/\*.*,/*,;
- }
-
- # We need to match things like
- #
- # int foo (int bar, bool wizz);
- # foo (bar, wizz);
- #
- # but not match things like:
- #
- # typedef int (*foo)(bar wizz)
- #
- # we can't do this (efficiently) without
- # missing things like
- #
- # foo (*bar, wizz);
- #
- # We also don't want to spoil the $data so it can be used
- # later on.
- $tmpdata = $data;
- while ($tmpdata =~ /(\w+)\s\((?!\*)/) {
- my $kw = $1;
-
- # Allow space after keywords only
- if ($kw =~ /^(?:if|for|while|switch|return)$/) {
- $tmpdata =~ s/(?:$kw\s\()/XXX(/;
- } else {
- print "Whitespace after non-keyword:\n";
- print "$file:$.: $line";
- $ret = 1;
- last;
- }
- }
-
- # Require whitespace immediately after keywords
- if ($data =~ /\b(?:if|for|while|switch|return)\(/) {
- print "No whitespace after keyword:\n";
- print "$file:$.: $line";
- $ret = 1;
- }
-
- # Forbid whitespace between )( of a function typedef
- if ($data =~ /\(\*\w+\)\s+\(/) {
- print "Whitespace between ')' and '(':\n";
- print "$file:$.: $line";
- $ret = 1;
- }
-
- # Forbid whitespace following ( or prior to )
- # but allow whitespace before ) on a single line
- # (optionally followed by a semicolon)
- if (($data =~ /\s\)/ && not $data =~ /^\s+\);?$/) ||
- $data =~ /\((?!$)\s/) {
- print "Whitespace after '(' or before ')':\n";
- print "$file:$.: $line";
- $ret = 1;
- }
-
- # Forbid whitespace before ";" or ",". Things like below are
allowed:
- #
- # 1) The expression is empty for "for" loop. E.g.
- # for (i = 0; ; i++)
- #
- # 2) An empty statement. E.g.
- # while (write(statuswrite, &status, 1) == -1 &&
- # errno == EINTR)
- # ;
- #
- if ($data =~ /\s[;,]/) {
- unless ($data =~ /\S; ; / ||
- $data =~ /^\s+;/) {
- print "Whitespace before semicolon or comma:\n";
- print "$file:$.: $line";
- $ret = 1;
- }
- }
-
- # Require EOL, macro line continuation, or whitespace after ";".
- # Allow "for (;;)" as an exception.
- if ($data =~ /;[^ \\\n;)]/) {
- print "Invalid character after semicolon:\n";
- print "$file:$.: $line";
- $ret = 1;
- }
-
- # Require EOL, space, or enum/struct end after comma.
- if ($data =~ /,[^ \\\n)}]/) {
- print "Invalid character after comma:\n";
- print "$file:$.: $line";
- $ret = 1;
- }
-
- # Require spaces around assignment '=', compounds and '=='
- if ($data =~ /[^ ]\b[!<>&|\-+*\/%\^=]?=/ ||
- $data =~ /=[^= \\\n]/) {
- print "Spacing around '=' or '==':\n";
- print "$file:$.: $line";
- $ret = 1;
- }
-
- # One line conditional statements with one line bodies should
- # not use curly brackets.
- if ($data =~ /^\s*(if|while|for)\b.*\{$/) {
- $cb_linenum = $.;
- $cb_code = $line;
- $cb_scolon = 0;
- }
-
- # We need to check for exactly one semicolon inside the body,
- # because empty statements (e.g. with comment only) are
- # allowed
- if ($cb_linenum == $. - 1 && $data =~ /^[^;]*;[^;]*$/) {
- $cb_code .= $line;
- $cb_scolon = 1;
- }
-
- if ($data =~ /^\s*}\s*$/ &&
- $cb_linenum == $. - 2 &&
- $cb_scolon) {
-
- print "Curly brackets around single-line body:\n";
- print "$file:$cb_linenum-$.:\n$cb_code$line";
- $ret = 1;
-
- # There _should_ be no need to reset the values; but to
- # keep my inner peace...
- $cb_linenum = 0;
- $cb_scolon = 0;
- $cb_code = "";
- }
- }
- close FILE;
-}
-
-exit $ret;
diff --git a/build-aux/syntax-check.mk b/build-aux/syntax-check.mk
index 9b6c157029..d308896b26 100644
--- a/build-aux/syntax-check.mk
+++ b/build-aux/syntax-check.mk
@@ -2157,8 +2157,8 @@ prohibit-duplicate-header:
$(PYTHON) $(top_srcdir)/scripts/prohibit-duplicate-header.py
spacing-check:
- $(AM_V_GEN)$(VC_LIST) | $(GREP) '\.c$$' | xargs \
- $(PERL) $(top_srcdir)/build-aux/check-spacing.pl || \
+ $(AM_V_GEN)$(VC_LIST) | $(GREP) '\.c$$' | $(RUNUTF8) xargs \
+ $(PYTHON) $(top_srcdir)/scripts/check-spacing.py || \
{ echo '$(ME): incorrect formatting' 1>&2; exit 1; }
mock-noinline:
diff --git a/scripts/check-spacing.py b/scripts/check-spacing.py
new file mode 100755
index 0000000000..6b9f3ec1ba
--- /dev/null
+++ b/scripts/check-spacing.py
@@ -0,0 +1,229 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2012-2019 Red Hat, Inc.
+#
+# check-spacing.pl: Report any usage of 'function (..args..)'
+# Also check for other syntax issues, such as correct use of ';'
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library. If not, see
+# <
http://www.gnu.org/licenses/>.
+
+from __future__ import print_function
+
+import re
+import sys
+
+
+def check_whitespace(filename):
+ errs = False
+ with open(filename, 'r') as fh:
+ quotedmetaprog =
re.compile(r"""'[";,=]'""")
+ quotedstringprog =
re.compile(r'''"(?:[^\\\"]|\\.)*"''')
+ commentstartprog = re.compile(r'''^(.*)/\*.*$''')
+ commentendprog = re.compile(r'''^.*\*/(.*)$''')
+ commentprog = re.compile(r'''^(.*)/\*.*\*/(.*)''')
+ funcprog = re.compile(r'''(\w+)\s\((?!\*)''')
+ keywordprog = re.compile(
+ r'''^.*\b(?:if|for|while|switch|return)\(.*$''')
+ functypedefprog =
re.compile(r'''^.*\(\*\w+\)\s+\(.*$''')
+ whitespaceprog1 = re.compile(r'''^.*\s\).*$''')
+ whitespaceprog2 = re.compile(r'''^\s+\);?$''')
+ whitespaceprog3 = re.compile(r'''^.*\((?!$)\s.*''')
+ commasemiprog1 = re.compile(r'''.*\s[;,].*''')
+ commasemiprog2 = re.compile(r'''.*\S; ; .*''')
+ commasemiprog3 = re.compile(r'''^\s+;''')
+ semicolonprog = re.compile(r'''.*;[^ \\\n;)].*''')
+ commaprog = re.compile(r'''.*,[^ \\\n)}].*''')
+ assignprog1 = re.compile(r'''[^
]\b[!<>&|\-+*\/%\^=]?=''')
+ assignprog2 = re.compile(r'''=[^= \\\n]''')
+ condstartprog =
re.compile(r'''^\s*(if|while|for)\b.*\{$''')
+ statementprog = re.compile(r'''^[^;]*;[^;]*$''')
+ condendprog = re.compile(r'''^\s*}\s*$''')
+
+ incomment = False
+ # Per-file variables for multiline Curly Bracket (cb_) check
+ cb_lineno = 0
+ cb_code = ""
+ cb_scolon = False
+
+ lineno = 0
+ for line in fh:
+ lineno = lineno + 1
+ data = line
+ # For temporary modifications
+
+ # Kill any quoted , ; = or "
+ data = quotedmetaprog.sub("'X'", data)
+
+ # Kill any quoted strings
+ data = quotedstringprog.sub('"XXX"', data)
+
+ if data[0] == '#':
+ continue
+
+ # Kill contents of multi-line comments
+ # and detect end of multi-line comments
+ if incomment:
+ if commentendprog.match(data):
+ data = commentendprog.sub('*/\2', data)
+ incomment = False
+ else:
+ data = ""
+
+ # Kill single line comments, and detect
+ # start of multi-line comments
+ if commentprog.match(data):
+ data = commentprog.sub(r'''\1/* */\2''', data)
+ elif commentstartprog.match(data):
+ data = commentstartprog.sub(r'''\1/*''', data)
+ incomment = True
+
+ # We need to match things like
+ #
+ # int foo (int bar, bool wizz);
+ # foo (bar, wizz);
+ #
+ # but not match things like:
+ #
+ # typedef int (*foo)(bar wizz)
+ #
+ # we can't do this (efficiently) without
+ # missing things like
+ #
+ # foo (*bar, wizz);
+ #
+ for match in funcprog.finditer(data):
+ kw = match.group(1)
+
+ # Allow space after keywords only
+ if kw not in ["if", "for", "while",
"switch", "return"]:
+ print("Whitespace after non-keyword:",
+ file=sys.stderr)
+ print("%s:%d: %s" % (filename, lineno, line),
+ file=sys.stderr)
+ errs = True
+ break
+
+ # Require whitespace immediately after keywords
+ if keywordprog.match(data):
+ print("No whitespace after keyword:",
+ file=sys.stderr)
+ print("%s:%d: %s" % (filename, lineno, line),
+ file=sys.stderr)
+ errs = True
+
+ # Forbid whitespace between )( of a function typedef
+ if functypedefprog.match(data):
+ print("Whitespace between ')' and '(':",
+ file=sys.stderr)
+ print("%s:%d: %s" % (filename, lineno, line),
+ file=sys.stderr)
+ errs = True
+
+ # Forbid whitespace following ( or prior to )
+ # but allow whitespace before ) on a single line
+ # (optionally followed by a semicolon)
+ if ((whitespaceprog1.match(data) and
+ not whitespaceprog2.match(data))
+ or whitespaceprog3.match(data)):
+ print("Whitespace after '(' or before ')':",
+ file=sys.stderr)
+ print("%s:%d: %s" % (filename, lineno, line),
+ file=sys.stderr)
+ errs = True
+
+ # Forbid whitespace before ";" or ",". Things like
+ # below are allowed:
+ #
+ # 1) The expression is empty for "for" loop. E.g.
+ # for (i = 0; ; i++)
+ #
+ # 2) An empty statement. E.g.
+ # while (write(statuswrite, &status, 1) == -1 &&
+ # errno == EINTR)
+ # ;
+ #
+ if commasemiprog1.match(data) and not (
+ commasemiprog2.match(data) or
+ commasemiprog3.match(data)):
+ print("Whitespace before semicolon or comma:",
+ file=sys.stderr)
+ print("%s:%d: %s" % (filename, lineno, line),
+ file=sys.stderr)
+ errs = True
+
+ # Require EOL, macro line continuation, or whitespace after ";".
+ # Allow "for (;;)" as an exception.
+ if semicolonprog.match(data):
+ print("Invalid character after semicolon:",
+ file=sys.stderr)
+ print("%s:%d: %s" % (filename, lineno, line),
+ file=sys.stderr)
+ errs = True
+
+ # Require EOL, space, or enum/struct end after comma.
+ if commaprog.match(data):
+ print("Invalid character after comma:",
+ file=sys.stderr)
+ print("%s:%d: %s" % (filename, lineno, line),
+ file=sys.stderr)
+ errs = True
+
+ # Require spaces around assignment '=', compounds and '=='
+ if assignprog1.match(data) or assignprog2.match(data):
+ print("Spacing around '=' or '==':",
+ file=sys.stderr)
+ print("%s:%d: %s" % (filename, lineno, line),
+ file=sys.stderr)
+ errs = True
+
+ # One line conditional statements with one line bodies should
+ # not use curly brackets.
+ if condstartprog.match(data):
+ cb_lineno = lineno
+ cb_code = line
+ cb_scolon = False
+
+ # We need to check for exactly one semicolon inside the body,
+ # because empty statements (e.g. with comment only) are
+ # allowed
+ if (cb_lineno == lineno - 1) and statementprog.match(data):
+ cb_code = cb_code + line
+ cb_scolon = True
+
+ if (condendprog.match(data) and
+ (cb_lineno == lineno - 2) and
+ cb_scolon):
+ print("Curly brackets around single-line body:",
+ file=sys.stderr)
+ print("%s:%d:\n%s%s" % (filename, cb_lineno - lineno,
+ cb_code, line),
+ file=sys.stderr)
+ errs = True
+
+ # There _should_ be no need to reset the values; but to
+ # keep my inner peace...
+ cb_lineno = 0
+ cb_scolon = False
+ cb_code = ""
+
+ return errs
+
+
+ret = 0
+for filename in sys.argv[1:]:
+ if check_whitespace(filename):
+ ret = 1
+
+sys.exit(ret)
--
2.21.0