From 8a91d8678312f12c3f3e35ec540bdbd6c70085c5 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Sat, 9 May 2015 12:21:10 -0400 Subject: [PATCH] Speed up pre-commit boilerplate by only checking changed files Although the boilerplate checker was very fast it can be faster. With this change we can hand the boilerplate a list of files which need to be checked or give it no files. If given no files it will run all files in the repo. Before you had to explicitly tell the boiler checker the 'extention' of the the files. In this case we let the checker figure it out and load the headers as needed. Doing the whole repo takes about 0.4 seconds. Doing a single go file takes < .04 seconds. --- hack/boilerplate/boilerplate.py | 89 ++++++++++++++++++++++++++------- hack/verify-boilerplate.sh | 28 +---------- hooks/pre-commit | 7 +-- 3 files changed, 75 insertions(+), 49 deletions(-) diff --git a/hack/boilerplate/boilerplate.py b/hack/boilerplate/boilerplate.py index f04068e545b..7c788f54d0a 100755 --- a/hack/boilerplate/boilerplate.py +++ b/hack/boilerplate/boilerplate.py @@ -16,22 +16,44 @@ from __future__ import print_function +import argparse +import glob import json import mmap import os import re import sys -def PrintError(*err): - print(*err, file=sys.stderr) +parser = argparse.ArgumentParser() +parser.add_argument("filenames", help="list of files to check, all files if unspecified", nargs='*') +args = parser.parse_args() -def file_passes(filename, extension, ref, regexs): +rootdir = os.path.dirname(__file__) + "/../../" +rootdir = os.path.abspath(rootdir) + +def get_refs(): + refs = {} + for path in glob.glob(os.path.join(rootdir, "hack/boilerplate/boilerplate.*.txt")): + extension = os.path.basename(path).split(".")[1] + + ref_file = open(path, 'r') + ref = ref_file.read().splitlines() + ref_file.close() + refs[extension] = ref + + return refs + +def file_passes(filename, refs, regexs): try: f = open(filename, 'r') except: return False data = f.read() + f.close() + + extension = file_extension(filename) + ref = refs[extension] # remove build tags from the top of Go files if extension == "go": @@ -70,25 +92,48 @@ def file_passes(filename, extension, ref, regexs): return True -def main(): - if len(sys.argv) < 3: - PrintError("usage: %s extension FILENAME [FILENAMES]" % sys.argv[0]) - return False +def file_extension(filename): + return os.path.splitext(filename)[1].split(".")[-1].lower() - basedir = os.path.dirname(os.path.abspath(__file__)) +skipped_dirs = ['Godeps', 'third_party', '_output', '.git'] +def normalize_files(files): + newfiles = [] + for pathname in files: + if any(x in pathname for x in skipped_dirs): + continue + newfiles.append(pathname) + for i, pathname in enumerate(newfiles): + if not os.path.isabs(pathname): + newfiles[i] = os.path.join(rootdir, pathname) + return newfiles - extension = sys.argv[1] - # argv[0] is the binary, argv[1] is the extension (go, sh, py, whatever) - filenames = sys.argv[2:] +def get_files(extensions): + files = [] + if len(args.filenames) > 0: + files = args.filenames + else: + for root, dirs, walkfiles in os.walk(rootdir): + # don't visit certain dirs. This is just a performance improvement + # as we would prune these later in normalize_files(). But doing it + # cuts down the amount of filesystem walking we do and cuts down + # the size of the file list + for d in skipped_dirs: + if d in dirs: + dirs.remove(d) - ref_filename = basedir + "/boilerplate." + extension + ".txt" - try: - ref_file = open(ref_filename, 'r') - except: - # No boilerplate template is success - return True - ref = ref_file.read().splitlines() + for name in walkfiles: + pathname = os.path.join(root, name) + files.append(pathname) + files = normalize_files(files) + outfiles = [] + for pathname in files: + extension = file_extension(pathname) + if extension in extensions: + outfiles.append(pathname) + return outfiles + +def get_regexs(): regexs = {} # Search for "YEAR" which exists in the boilerplate, but shouldn't in the real thing regexs["year"] = re.compile( 'YEAR' ) @@ -98,9 +143,15 @@ def main(): regexs["go_build_constraints"] = re.compile(r"^(// \+build.*\n)+\n", re.MULTILINE) # strip #!.* from shell scripts regexs["shebang"] = re.compile(r"^(#!.*\n)\n*", re.MULTILINE) + return regexs + +def main(): + regexs = get_regexs() + refs = get_refs() + filenames = get_files(refs.keys()) for filename in filenames: - if not file_passes(filename, extension, ref, regexs): + if not file_passes(filename, refs, regexs): print(filename, file=sys.stdout) if __name__ == "__main__": diff --git a/hack/verify-boilerplate.sh b/hack/verify-boilerplate.sh index c08113460f8..8d0c0fe656c 100755 --- a/hack/verify-boilerplate.sh +++ b/hack/verify-boilerplate.sh @@ -21,33 +21,7 @@ set -o pipefail KUBE_ROOT=$(dirname "${BASH_SOURCE}")/.. boiler="${KUBE_ROOT}/hack/boilerplate/boilerplate.py" -cd ${KUBE_ROOT} - -find_files() { - local ext=$1 - find . -not \( \ - \( \ - -wholename './output' \ - -o -wholename './_output' \ - -o -wholename './release' \ - -o -wholename './target' \ - -o -wholename './.git' \ - -o -wholename '*/third_party/*' \ - -o -wholename '*/Godeps/*' \ - \) -prune \ - \) -name "*.${ext}" -} - -files_need_boilerplate=() - -files=($(find_files "go")) -files_need_boilerplate+=($(${boiler} "go" "${files[@]}")) - -files=($(find_files "sh")) -files_need_boilerplate+=($(${boiler} "sh" "${files[@]}")) - -files=($(find_files "py")) -files_need_boilerplate+=($(${boiler} "py" "${files[@]}")) +files_need_boilerplate=($(${boiler} "$@")) if [[ ${#files_need_boilerplate[@]} -gt 0 ]]; then for file in "${files_need_boilerplate[@]}"; do diff --git a/hooks/pre-commit b/hooks/pre-commit index 6b845d89134..9b7733965dc 100755 --- a/hooks/pre-commit +++ b/hooks/pre-commit @@ -39,11 +39,12 @@ fi echo "${reset}" echo -ne "Checking for files that need boilerplate... " -out=($(hack/verify-boilerplate.sh)) -if [[ $? -ne 0 ]]; then +files=($(git diff --cached --name-only --diff-filter ACM)) +out=($(hack/boilerplate/boilerplate.py "${files[@]}")) +if [[ "${#out}" -ne 0 ]]; then echo "${red}ERROR!" echo "Some files are missing the required boilerplate header" - echo "from hooks/boilerplate.txt:" + echo "from hack/boilerplate/boilerplate.*.txt:" for f in "${out[@]}"; do echo " ${f}" done