Merge pull request #12557 from eparis/boilerplate-multiple-file-types

Speed up pre-commit boilerplate by only checking changed files
This commit is contained in:
Brendan Burns 2015-08-14 16:26:00 -07:00
commit d7763a1b2b
3 changed files with 75 additions and 49 deletions

View File

@ -16,22 +16,44 @@
from __future__ import print_function
import argparse
import glob
import json
import mmap
import os
import re
import sys
def PrintError(*err):
print(*err, file=sys.stderr)
parser = argparse.ArgumentParser()
parser.add_argument("filenames", help="list of files to check, all files if unspecified", nargs='*')
args = parser.parse_args()
def file_passes(filename, extension, ref, regexs):
rootdir = os.path.dirname(__file__) + "/../../"
rootdir = os.path.abspath(rootdir)
def get_refs():
refs = {}
for path in glob.glob(os.path.join(rootdir, "hack/boilerplate/boilerplate.*.txt")):
extension = os.path.basename(path).split(".")[1]
ref_file = open(path, 'r')
ref = ref_file.read().splitlines()
ref_file.close()
refs[extension] = ref
return refs
def file_passes(filename, refs, regexs):
try:
f = open(filename, 'r')
except:
return False
data = f.read()
f.close()
extension = file_extension(filename)
ref = refs[extension]
# remove build tags from the top of Go files
if extension == "go":
@ -70,25 +92,48 @@ def file_passes(filename, extension, ref, regexs):
return True
def main():
if len(sys.argv) < 3:
PrintError("usage: %s extension FILENAME [FILENAMES]" % sys.argv[0])
return False
def file_extension(filename):
return os.path.splitext(filename)[1].split(".")[-1].lower()
basedir = os.path.dirname(os.path.abspath(__file__))
skipped_dirs = ['Godeps', 'third_party', '_output', '.git']
def normalize_files(files):
newfiles = []
for pathname in files:
if any(x in pathname for x in skipped_dirs):
continue
newfiles.append(pathname)
for i, pathname in enumerate(newfiles):
if not os.path.isabs(pathname):
newfiles[i] = os.path.join(rootdir, pathname)
return newfiles
extension = sys.argv[1]
# argv[0] is the binary, argv[1] is the extension (go, sh, py, whatever)
filenames = sys.argv[2:]
def get_files(extensions):
files = []
if len(args.filenames) > 0:
files = args.filenames
else:
for root, dirs, walkfiles in os.walk(rootdir):
# don't visit certain dirs. This is just a performance improvement
# as we would prune these later in normalize_files(). But doing it
# cuts down the amount of filesystem walking we do and cuts down
# the size of the file list
for d in skipped_dirs:
if d in dirs:
dirs.remove(d)
ref_filename = basedir + "/boilerplate." + extension + ".txt"
try:
ref_file = open(ref_filename, 'r')
except:
# No boilerplate template is success
return True
ref = ref_file.read().splitlines()
for name in walkfiles:
pathname = os.path.join(root, name)
files.append(pathname)
files = normalize_files(files)
outfiles = []
for pathname in files:
extension = file_extension(pathname)
if extension in extensions:
outfiles.append(pathname)
return outfiles
def get_regexs():
regexs = {}
# Search for "YEAR" which exists in the boilerplate, but shouldn't in the real thing
regexs["year"] = re.compile( 'YEAR' )
@ -98,9 +143,15 @@ def main():
regexs["go_build_constraints"] = re.compile(r"^(// \+build.*\n)+\n", re.MULTILINE)
# strip #!.* from shell scripts
regexs["shebang"] = re.compile(r"^(#!.*\n)\n*", re.MULTILINE)
return regexs
def main():
regexs = get_regexs()
refs = get_refs()
filenames = get_files(refs.keys())
for filename in filenames:
if not file_passes(filename, extension, ref, regexs):
if not file_passes(filename, refs, regexs):
print(filename, file=sys.stdout)
if __name__ == "__main__":

View File

@ -21,33 +21,7 @@ set -o pipefail
KUBE_ROOT=$(dirname "${BASH_SOURCE}")/..
boiler="${KUBE_ROOT}/hack/boilerplate/boilerplate.py"
cd ${KUBE_ROOT}
find_files() {
local ext=$1
find . -not \( \
\( \
-wholename './output' \
-o -wholename './_output' \
-o -wholename './release' \
-o -wholename './target' \
-o -wholename './.git' \
-o -wholename '*/third_party/*' \
-o -wholename '*/Godeps/*' \
\) -prune \
\) -name "*.${ext}"
}
files_need_boilerplate=()
files=($(find_files "go"))
files_need_boilerplate+=($(${boiler} "go" "${files[@]}"))
files=($(find_files "sh"))
files_need_boilerplate+=($(${boiler} "sh" "${files[@]}"))
files=($(find_files "py"))
files_need_boilerplate+=($(${boiler} "py" "${files[@]}"))
files_need_boilerplate=($(${boiler} "$@"))
if [[ ${#files_need_boilerplate[@]} -gt 0 ]]; then
for file in "${files_need_boilerplate[@]}"; do

View File

@ -39,11 +39,12 @@ fi
echo "${reset}"
echo -ne "Checking for files that need boilerplate... "
out=($(hack/verify-boilerplate.sh))
if [[ $? -ne 0 ]]; then
files=($(git diff --cached --name-only --diff-filter ACM))
out=($(hack/boilerplate/boilerplate.py "${files[@]}"))
if [[ "${#out}" -ne 0 ]]; then
echo "${red}ERROR!"
echo "Some files are missing the required boilerplate header"
echo "from hooks/boilerplate.txt:"
echo "from hack/boilerplate/boilerplate.*.txt:"
for f in "${out[@]}"; do
echo " ${f}"
done