Merge pull request #12557 from eparis/boilerplate-multiple-file-types

Speed up pre-commit boilerplate by only checking changed files
This commit is contained in:
Brendan Burns 2015-08-14 16:26:00 -07:00
commit d7763a1b2b
3 changed files with 75 additions and 49 deletions

View File

@ -16,22 +16,44 @@
from __future__ import print_function from __future__ import print_function
import argparse
import glob
import json import json
import mmap import mmap
import os import os
import re import re
import sys import sys
def PrintError(*err): parser = argparse.ArgumentParser()
print(*err, file=sys.stderr) parser.add_argument("filenames", help="list of files to check, all files if unspecified", nargs='*')
args = parser.parse_args()
def file_passes(filename, extension, ref, regexs): rootdir = os.path.dirname(__file__) + "/../../"
rootdir = os.path.abspath(rootdir)
def get_refs():
refs = {}
for path in glob.glob(os.path.join(rootdir, "hack/boilerplate/boilerplate.*.txt")):
extension = os.path.basename(path).split(".")[1]
ref_file = open(path, 'r')
ref = ref_file.read().splitlines()
ref_file.close()
refs[extension] = ref
return refs
def file_passes(filename, refs, regexs):
try: try:
f = open(filename, 'r') f = open(filename, 'r')
except: except:
return False return False
data = f.read() data = f.read()
f.close()
extension = file_extension(filename)
ref = refs[extension]
# remove build tags from the top of Go files # remove build tags from the top of Go files
if extension == "go": if extension == "go":
@ -70,25 +92,48 @@ def file_passes(filename, extension, ref, regexs):
return True return True
def main(): def file_extension(filename):
if len(sys.argv) < 3: return os.path.splitext(filename)[1].split(".")[-1].lower()
PrintError("usage: %s extension FILENAME [FILENAMES]" % sys.argv[0])
return False
basedir = os.path.dirname(os.path.abspath(__file__)) skipped_dirs = ['Godeps', 'third_party', '_output', '.git']
def normalize_files(files):
newfiles = []
for pathname in files:
if any(x in pathname for x in skipped_dirs):
continue
newfiles.append(pathname)
for i, pathname in enumerate(newfiles):
if not os.path.isabs(pathname):
newfiles[i] = os.path.join(rootdir, pathname)
return newfiles
extension = sys.argv[1] def get_files(extensions):
# argv[0] is the binary, argv[1] is the extension (go, sh, py, whatever) files = []
filenames = sys.argv[2:] if len(args.filenames) > 0:
files = args.filenames
else:
for root, dirs, walkfiles in os.walk(rootdir):
# don't visit certain dirs. This is just a performance improvement
# as we would prune these later in normalize_files(). But doing it
# cuts down the amount of filesystem walking we do and cuts down
# the size of the file list
for d in skipped_dirs:
if d in dirs:
dirs.remove(d)
ref_filename = basedir + "/boilerplate." + extension + ".txt" for name in walkfiles:
try: pathname = os.path.join(root, name)
ref_file = open(ref_filename, 'r') files.append(pathname)
except:
# No boilerplate template is success
return True
ref = ref_file.read().splitlines()
files = normalize_files(files)
outfiles = []
for pathname in files:
extension = file_extension(pathname)
if extension in extensions:
outfiles.append(pathname)
return outfiles
def get_regexs():
regexs = {} regexs = {}
# Search for "YEAR" which exists in the boilerplate, but shouldn't in the real thing # Search for "YEAR" which exists in the boilerplate, but shouldn't in the real thing
regexs["year"] = re.compile( 'YEAR' ) regexs["year"] = re.compile( 'YEAR' )
@ -98,9 +143,15 @@ def main():
regexs["go_build_constraints"] = re.compile(r"^(// \+build.*\n)+\n", re.MULTILINE) regexs["go_build_constraints"] = re.compile(r"^(// \+build.*\n)+\n", re.MULTILINE)
# strip #!.* from shell scripts # strip #!.* from shell scripts
regexs["shebang"] = re.compile(r"^(#!.*\n)\n*", re.MULTILINE) regexs["shebang"] = re.compile(r"^(#!.*\n)\n*", re.MULTILINE)
return regexs
def main():
regexs = get_regexs()
refs = get_refs()
filenames = get_files(refs.keys())
for filename in filenames: for filename in filenames:
if not file_passes(filename, extension, ref, regexs): if not file_passes(filename, refs, regexs):
print(filename, file=sys.stdout) print(filename, file=sys.stdout)
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -21,33 +21,7 @@ set -o pipefail
KUBE_ROOT=$(dirname "${BASH_SOURCE}")/.. KUBE_ROOT=$(dirname "${BASH_SOURCE}")/..
boiler="${KUBE_ROOT}/hack/boilerplate/boilerplate.py" boiler="${KUBE_ROOT}/hack/boilerplate/boilerplate.py"
cd ${KUBE_ROOT} files_need_boilerplate=($(${boiler} "$@"))
find_files() {
local ext=$1
find . -not \( \
\( \
-wholename './output' \
-o -wholename './_output' \
-o -wholename './release' \
-o -wholename './target' \
-o -wholename './.git' \
-o -wholename '*/third_party/*' \
-o -wholename '*/Godeps/*' \
\) -prune \
\) -name "*.${ext}"
}
files_need_boilerplate=()
files=($(find_files "go"))
files_need_boilerplate+=($(${boiler} "go" "${files[@]}"))
files=($(find_files "sh"))
files_need_boilerplate+=($(${boiler} "sh" "${files[@]}"))
files=($(find_files "py"))
files_need_boilerplate+=($(${boiler} "py" "${files[@]}"))
if [[ ${#files_need_boilerplate[@]} -gt 0 ]]; then if [[ ${#files_need_boilerplate[@]} -gt 0 ]]; then
for file in "${files_need_boilerplate[@]}"; do for file in "${files_need_boilerplate[@]}"; do

View File

@ -39,11 +39,12 @@ fi
echo "${reset}" echo "${reset}"
echo -ne "Checking for files that need boilerplate... " echo -ne "Checking for files that need boilerplate... "
out=($(hack/verify-boilerplate.sh)) files=($(git diff --cached --name-only --diff-filter ACM))
if [[ $? -ne 0 ]]; then out=($(hack/boilerplate/boilerplate.py "${files[@]}"))
if [[ "${#out}" -ne 0 ]]; then
echo "${red}ERROR!" echo "${red}ERROR!"
echo "Some files are missing the required boilerplate header" echo "Some files are missing the required boilerplate header"
echo "from hooks/boilerplate.txt:" echo "from hack/boilerplate/boilerplate.*.txt:"
for f in "${out[@]}"; do for f in "${out[@]}"; do
echo " ${f}" echo " ${f}"
done done