diff --git a/tests/.gitignore b/tests/.gitignore index 122d16071..38a895f0e 100644 --- a/tests/.gitignore +++ b/tests/.gitignore @@ -1 +1,3 @@ +cmd/check-markdown/kata-check-markdown +cmd/github-labels/kata-github-labels integration/kubernetes/runtimeclass_workloads_work/ diff --git a/tests/.golangci.yml b/tests/.golangci.yml new file mode 100644 index 000000000..5608aedba --- /dev/null +++ b/tests/.golangci.yml @@ -0,0 +1,33 @@ +# Copyright (c) 2017 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +run: + concurrency: 4 + deadline: 600s + skip-dirs: + - vendor +# Ignore auto-generated protobuf code. + skip-files: + - ".*\\.pb\\.go$" + +linters: + disable-all: true + enable: + - gocyclo + - gofmt + - gosimple + - govet + - ineffassign + - misspell + - staticcheck + - typecheck + - unused + +linters-settings: + gocyclo: + min_complexity: 15 + unused: + check-exported: true + govet: + enable: diff --git a/tests/go.mod b/tests/go.mod new file mode 100644 index 000000000..61a26671c --- /dev/null +++ b/tests/go.mod @@ -0,0 +1,27 @@ +module github.com/kata-containers/tests + +go 1.19 + +require ( + github.com/BurntSushi/toml v0.3.1 + github.com/montanaflynn/stats v0.0.0-20151014174947-eeaced052adb + github.com/olekukonko/tablewriter v0.0.6-0.20210304033056-74c60be0ef68 + github.com/sirupsen/logrus v1.8.1 + github.com/stretchr/testify v1.7.1 + github.com/urfave/cli v1.22.0 + gopkg.in/russross/blackfriday.v2 v2.0.0-00010101000000-000000000000 + gopkg.in/yaml.v2 v2.4.0 +) + +require ( + github.com/cpuguy83/go-md2man v1.0.10 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/mattn/go-runewidth v0.0.13 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/rivo/uniseg v0.2.0 // indirect + github.com/russross/blackfriday v1.6.0 // indirect + golang.org/x/sys v0.0.0-20220429233432-b5fbb4746d32 // indirect + gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect +) + +replace gopkg.in/russross/blackfriday.v2 => github.com/russross/blackfriday/v2 v2.1.0 diff --git a/tests/kata-doc-to-script.sh b/tests/kata-doc-to-script.sh new file mode 100755 index 000000000..126073fd5 --- /dev/null +++ b/tests/kata-doc-to-script.sh @@ -0,0 +1,229 @@ +#!/bin/bash +license=" +# +# Copyright (c) 2018 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +" + +set -e + +[ -n "$DEBUG" ] && set -x + +script_name="${0##*/}" + +typeset -r warning="WARNING: Do *NOT* run the generated script without reviewing it carefully first!" + +# github markdown markers used to surround a code block. All text within the +# markers is rendered in a fixed font. +typeset -r bash_block_open="\`\`\`bash" +typeset -r block_open="\`\`\`" +typeset -r block_close="\`\`\`" + +# GitHub issue templates have a special metadata section at the top delimited +# by this string. See: +# +# https://raw.githubusercontent.com/kata-containers/.github/master/.github/ISSUE_TEMPLATE/bug_report.md +typeset -r metadata_block='---' + +# Used to delimit inline code blocks +typeset -r backtick="\`" + +# convention used in all documentation to represent a non-privileged users +# shell prompt. All lines starting with this value inside a code block are +# commands the user should run. +typeset -r code_prompt="\$ " + +# files are expected to match this regular expression +typeset -r extension_regex="\.md$" + +strict="no" +require_commands="no" +check_only="no" +invert="no" +verbose="no" + +usage() +{ + cat < [ []] + +This script will convert a github-flavoured markdown document file into a +bash(1) script to stdout by extracting the bash code blocks. + +Options: + + -c : check the file but don't create the script (sets exit code). + -h : show this usage. + -i : invert output (remove code blocks and inline code, displaying the + remaining parts of the document). Incompatible with '-c'. + -r : require atleast one command block to be found. + -s : strict mode - perform extra checks. + -v : verbose mode. + +Example usage: + + $ ${script_name} foo.md foo.md.sh + +Notes: + +- If a description is specified, it will be added to the script as a + comment. +- may be specified as '-' meaning send output to stdout. + +Limitations: + +- The script is unable to handle embedded code blocks like this: + + \`\`\` + + \`\`\`bash + \$ echo code in an embedded set of backticks + \`\`\` + + \`\`\` + + To overcome this issue, ensure that the outer set of backticks are replaced + with an HTML PRE tag: + +
+
+      \`\`\`bash
+      \$ echo code in an embedded set of backticks
+      \`\`\`
+
+  
+ + This will both render correctly on GitHub and allow this script to remove + the code block. + + Note: this solves one problem but introduces another - this script will not + remove the HTML tags. + +${warning} + +EOF + + exit 0 +} + +die() +{ + local msg="$*" + + echo "ERROR: $msg" >&2 + exit 1 +} + +script_header() +{ + local -r description="$1" + + cat <<-EOF + #!/bin/bash + ${license} + #---------------------------------------------- + # WARNING: Script auto-generated from '$file'. + # + # ${warning} + #---------------------------------------------- + + #---------------------------------------------- + # Description: $description + #---------------------------------------------- + + # fail the entire script if any simple command fails + set -e + +EOF +} + +# Convert the specified github-flavoured markdown format file +# into a bash script by extracting the bash blocks. +doc_to_script() +{ + file="$1" + outfile="$2" + description="$3" + invert="$4" + + [ -n "$file" ] || die "need file" + + [ "${check_only}" = "no" ] && [ -z "$outfile" ] && die "need output file" + [ "$outfile" = '-' ] && outfile="/dev/stdout" + + if [ "$invert" = "yes" ] + then + # First, remove code blocks. + # Next, remove inline code in backticks. + # Finally, remove a metadata block as used in GitHub issue + # templates. + cat "$file" |\ + sed -e "/^[ \>]*${block_open}/,/^[ \>]*${block_close}/d" \ + -e "s/${backtick}[^${backtick}]*${backtick}//g" \ + -e "/^${metadata_block}$/,/^${metadata_block}$/d" \ + > "$outfile" + return + fi + + all=$(mktemp) + body=$(mktemp) + + cat "$file" |\ + sed -n "/^ *${bash_block_open}/,/^ *${block_close}/ p" |\ + sed -e "/^ *${block_close}/ d" \ + -e "s/^ *${code_prompt}//g" \ + -e 's/^ *//g' > "$body" + + [ "$require_commands" = "yes" ] && [ ! -s "$body" ] && die "no commands found in file '$file'" + + script_header "$description" > "$all" + cat "$body" >> "$all" + + # sanity check + [ "$check_only" = "yes" ] && redirect="1>/dev/null 2>/dev/null" + + { local ret; eval bash -n "$all" $redirect; ret=$?; } || true + [ "$ret" -ne 0 ] && die "shell code in file '$file' is not valid" + + # create output file + [ "$check_only" = "no" ] && cp "$all" "$outfile" + + # clean up + rm -f "$body" "$all" +} + +main() +{ + while getopts "chirsv" opt + do + case $opt in + c) check_only="yes" ;; + h) usage ;; + i) invert="yes" ;; + r) require_commands="yes" ;; + s) strict="yes" ;; + v) verbose="yes" ;; + esac + done + + shift $(($OPTIND - 1)) + + file="$1" + outfile="$2" + description="$3" + + [ -n "$file" ] || die "need file" + + [ "$verbose" = "yes" ] && echo "INFO: processing file '$file'" + + if [ "$strict" = "yes" ] + then + echo "$file"|grep -q "$extension_regex" ||\ + die "file '$file' doesn't match pattern '$extension_regex'" + fi + + doc_to_script "$file" "$outfile" "$description" "$invert" +} + +main "$@" diff --git a/tests/static-checks.sh b/tests/static-checks.sh new file mode 100755 index 000000000..8f747198c --- /dev/null +++ b/tests/static-checks.sh @@ -0,0 +1,1334 @@ +#!/usr/bin/env bash + +# Copyright (c) 2017-2019 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +# Description: Central script to run all static checks. +# This script should be called by all other repositories to ensure +# there is only a single source of all static checks. + +set -e + +[ -n "$DEBUG" ] && set -x + +cidir=$(realpath $(dirname "$0")) +source "${cidir}/lib.sh" + +# By default in Golang >= 1.16 GO111MODULE is set to "on", +# some subprojects in this repo may not support "go modules", +# set GO111MODULE to "auto" to enable module-aware mode only when +# a go.mod file is present in the current directory. +export GO111MODULE="auto" +export tests_repo="${tests_repo:-github.com/kata-containers/tests}" +export tests_repo_dir="${GOPATH}/src/${tests_repo}" + +# List of files to delete on exit +files_to_remove=() + +script_name=${0##*/} + +# Static check functions must follow the following naming conventions: +# + +# All static check function names must match this pattern. +typeset -r check_func_regex="^static_check_" + +# All architecture-specific static check functions must match this pattern. +typeset -r arch_func_regex="_arch_specific$" + +repo="" +specific_branch="false" +force="false" +branch=${branch:-main} + +# Which static check functions to consider. +handle_funcs="all" + +single_func_only="false" +list_only="false" + +# number of seconds to wait for curl to check a URL +typeset url_check_timeout_secs="${url_check_timeout_secs:-60}" + +# number of attempts that will be made to check an individual URL. +typeset url_check_max_tries="${url_check_max_tries:-3}" + +typeset -A long_options + +# Generated code +ignore_clh_generated_code="virtcontainers/pkg/cloud-hypervisor/client" + +paths_to_skip=( + "${ignore_clh_generated_code}" + "vendor" +) + +# Skip paths that are not statically checked +# $1 : List of paths to check, space separated list +# If you have a list in a bash array call in this way: +# list=$(skip_paths "${list[@]}") +# If you still want to use it as an array do: +# list=(${list}) +skip_paths(){ + local list_param="${1}" + [ -z "$list_param" ] && return + local list=(${list_param}) + + for p in "${paths_to_skip[@]}"; do + new_list=() + for l in "${list[@]}"; do + if echo "${l}" | grep -qv "${p}"; then + new_list=("${new_list[@]}" "${l}") + fi + done + list=("${new_list[@]}") + done + echo "${list[@]}" +} + + +long_options=( + [all]="Force checking of all changes, including files in the base branch" + [branch]="Specify upstream branch to compare against (default '$branch')" + [docs]="Check document files" + [dockerfiles]="Check dockerfiles" + [files]="Check files" + [force]="Force a skipped test to run" + [golang]="Check '.go' files" + [help]="Display usage statement" + [json]="Check JSON files" + [labels]="Check labels databases" + [licenses]="Check licenses" + [list]="List tests that would run" + [no-arch]="Run/list all tests except architecture-specific ones" + [only-arch]="Only run/list architecture-specific tests" + [repo:]="Specify GitHub URL of repo to use (github.com/user/repo)" + [scripts]="Check script files" + [vendor]="Check vendor files" + [versions]="Check versions files" + [xml]="Check XML files" +) + +yamllint_cmd="yamllint" +have_yamllint_cmd=$(command -v "$yamllint_cmd" || true) + +chronic=chronic + +# Disable chronic on OSX to avoid having to update the Travis config files +# for additional packages on that platform. +[ "$(uname -s)" == "Darwin" ] && chronic= + +usage() +{ + cat </dev/null || die "function '$name' does not exist" +} + +# Calls die() if the specified function is not valid or not a check function. +ensure_func_is_check_func() { + local name="$1" + + func_is_valid "$name" + + { echo "$name" | grep -q "${check_func_regex}"; ret=$?; } + + [ "$ret" = 0 ] || die "function '$name' is not a check function" +} + +# Returns "yes" if the specified function needs to run on all architectures, +# else "no". +func_is_arch_specific() { + local name="$1" + + ensure_func_is_check_func "$name" + + { echo "$name" | grep -q "${arch_func_regex}"; ret=$?; } + + if [ "$ret" = 0 ]; then + echo "yes" + else + echo "no" + fi +} + +function remove_tmp_files() { + rm -rf "${files_to_remove[@]}" +} + +# Convert a golang package to a full path +pkg_to_path() +{ + local pkg="$1" + + go list -f '{{.Dir}}' "$pkg" +} + +# Check that chronic is installed, otherwise die. +need_chronic() { + local first_word + [ -z "$chronic" ] && return + first_word="${chronic%% *}" + command -v chronic &>/dev/null || \ + die "chronic command not found. You must have it installed to run this check." \ + "Usually it is distributed with the 'moreutils' package of your Linux distribution." +} + + +static_check_go_arch_specific() +{ + local go_packages + local submodule_packages + local all_packages + + # List of all golang packages found in all submodules + # + # These will be ignored: since they are references to other + # repositories, we assume they are tested independently in their + # repository so do not need to be re-tested here. + submodule_packages=$(mktemp) + git submodule -q foreach "go list ./..." | sort > "$submodule_packages" || true + + # all packages + all_packages=$(mktemp) + go list ./... | sort > "$all_packages" || true + + files_to_remove+=("$submodule_packages" "$all_packages") + + # List of packages to consider which is defined as: + # + # "all packages" - "submodule packages" + # + # Note: the vendor filtering is required for versions of go older than 1.9 + go_packages=$(comm -3 "$all_packages" "$submodule_packages" || true) + go_packages=$(skip_paths "${go_packages[@]}") + + # No packages to test + [ -z "$go_packages" ] && return + + local linter="golangci-lint" + + # Run golang checks + if [ ! "$(command -v $linter)" ] + then + info "Installing ${linter}" + + local linter_url=$(get_test_version "externals.golangci-lint.url") + local linter_version=$(get_test_version "externals.golangci-lint.version") + + info "Forcing ${linter} version ${linter_version}" + curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(go env GOPATH)/bin "${linter_version}" + command -v $linter &>/dev/null || \ + die "$linter command not found. Ensure that \"\$GOPATH/bin\" is in your \$PATH." + fi + + local linter_args="run -c ${cidir}/.golangci.yml" + + # Non-option arguments other than "./..." are + # considered to be directories by $linter, not package names. + # Hence, we need to obtain a list of package directories to check, + # excluding any that relate to submodules. + local dirs + + for pkg in $go_packages + do + path=$(pkg_to_path "$pkg") + + makefile="${path}/Makefile" + + # perform a basic build since some repos generate code which + # is required for the package to be buildable (and thus + # checkable). + [ -f "$makefile" ] && (cd "$path" && make) + + dirs+=" $path" + done + + info "Running $linter checks on the following packages:\n" + echo "$go_packages" + echo + info "Package paths:\n" + echo "$dirs" | sed 's/^ *//g' | tr ' ' '\n' + for d in ${dirs};do + info "Running $linter on $d" + (cd $d && GO111MODULE=auto eval "$linter" "${linter_args}" ".") + done + +} + +# Install yamllint in the different Linux distributions +install_yamllint() +{ + source /etc/os-release || source /usr/lib/os-release + + package="yamllint" + + case "$ID" in + centos|rhel) sudo yum -y install $package ;; + ubuntu) sudo apt-get -y install $package ;; + fedora) sudo dnf -y install $package ;; + *) die "Please install yamllint on $ID" ;; + esac + + have_yamllint_cmd=$(command -v "$yamllint_cmd" || true) + + if [ -z "$have_yamllint_cmd" ]; then + info "Cannot install $package" && return + fi +} + +# Check the "versions database". +# +# Some repositories use a versions database to maintain version information +# about non-golang dependencies. If found, check it for validity. +static_check_versions() +{ + local db="versions.yaml" + + if [ -z "$have_yamllint_cmd" ]; then + info "Installing yamllint" + install_yamllint + fi + + [ ! -e "$db" ] && return + + if [ -n "$have_yamllint_cmd" ]; then + eval "$yamllint_cmd" "$db" + else + info "Cannot check versions as $yamllint_cmd not available" + fi +} + +static_check_labels() +{ + [ $(uname -s) != Linux ] && info "Can only check labels under Linux" && return + + # Handle SLES which doesn't provide the required command. + [ -z "$have_yamllint_cmd" ] && info "Cannot check labels as $yamllint_cmd not available" && return + + # Since this script is called from another repositories directory, + # ensure the utility is built before the script below (which uses it) is run. + (cd "${tests_repo_dir}" && make github-labels) + + tmp=$(mktemp) + + files_to_remove+=("${tmp}") + + info "Checking labels for repo ${repo} using temporary combined database ${tmp}" + + bash -f "${tests_repo_dir}/cmd/github-labels/github-labels.sh" "generate" "${repo}" "${tmp}" +} + +# Ensure all files (where possible) contain an SPDX license header +static_check_license_headers() +{ + # The branch is the baseline - ignore it. + [ "$specific_branch" = "true" ] && return + + # See: https://spdx.org/licenses/Apache-2.0.html + local -r spdx_tag="SPDX-License-Identifier" + local -r spdx_license="Apache-2.0" + local -r license_pattern="${spdx_tag}: ${spdx_license}" + local -r copyright_pattern="Copyright" + + local header_checks=() + + header_checks+=("SPDX license header::${license_pattern}") + header_checks+=("Copyright header:-i:${copyright_pattern}") + + files=$(get_pr_changed_file_details || true) + + # Strip off status + files=$(echo "$files"|awk '{print $NF}') + + # no files were changed + [ -z "$files" ] && info "No files found" && return + + local header_check + + for header_check in "${header_checks[@]}" + do + local desc=$(echo "$header_check"|cut -d: -f1) + local extra_args=$(echo "$header_check"|cut -d: -f2) + local pattern=$(echo "$header_check"|cut -d: -f3-) + + info "Checking $desc" + + local missing=$(egrep \ + --exclude=".git/*" \ + --exclude=".gitignore" \ + --exclude=".dockerignore" \ + --exclude="Gopkg.lock" \ + --exclude="*.gpl.c" \ + --exclude="*.ipynb" \ + --exclude="*.jpg" \ + --exclude="*.json" \ + --exclude="LICENSE*" \ + --exclude="THIRD-PARTY" \ + --exclude="*.md" \ + --exclude="*.pb.go" \ + --exclude="*pb_test.go" \ + --exclude="*.bin" \ + --exclude="*.png" \ + --exclude="*.pub" \ + --exclude="*.service" \ + --exclude="*.svg" \ + --exclude="*.drawio" \ + --exclude="*.toml" \ + --exclude="*.txt" \ + --exclude="*.dtd" \ + --exclude="vendor/*" \ + --exclude="VERSION" \ + --exclude="kata_config_version" \ + --exclude="tools/packaging/kernel/configs/*" \ + --exclude="virtcontainers/pkg/firecracker/*" \ + --exclude="${ignore_clh_generated_code}*" \ + --exclude="*.xml" \ + --exclude="*.yaml" \ + --exclude="*.yml" \ + --exclude="go.mod" \ + --exclude="go.sum" \ + --exclude="*.lock" \ + --exclude="grpc-rs/*" \ + --exclude="target/*" \ + --exclude="*.patch" \ + --exclude="*.diff" \ + --exclude="tools/packaging/static-build/qemu.blacklist" \ + --exclude="tools/packaging/qemu/default-configs/*" \ + --exclude="src/libs/protocols/protos/gogo/*.proto" \ + --exclude="src/libs/protocols/protos/google/*.proto" \ + --exclude="src/libs/*/test/texture/*" \ + -EL $extra_args "\<${pattern}\>" \ + $files || true) + + if [ -n "$missing" ]; then + cat >&2 <<-EOF + ERROR: Required $desc check ('$pattern') failed for the following files: + + $missing + +EOF + exit 1 + fi + done +} + +check_url() +{ + local url="$1" + local invalid_urls_dir="$2" + + local curl_out=$(mktemp) + files_to_remove+=("${curl_out}") + + info "Checking URL $url" + + # Process specific file to avoid out-of-order writes + local invalid_file=$(printf "%s/%d" "$invalid_urls_dir" "$$") + + local ret + local user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36" + + # Authenticate for github to increase threshold for rate limiting + local curl_args=() + if [[ "$url" =~ github\.com && -n "$GITHUB_USER" && -n "$GITHUB_TOKEN" ]]; then + curl_args+=("-u ${GITHUB_USER}:${GITHUB_TOKEN}") + fi + + # Some endpoints return 403 to HEAD but 200 for GET, so perform a GET but only read headers. + { curl ${curl_args[*]} -sIL -X GET -c - -A "${user_agent}" -H "Accept-Encoding: zstd, none, gzip, deflate" --max-time "$url_check_timeout_secs" \ + --retry "$url_check_max_tries" "$url" &>"$curl_out"; ret=$?; } || true + + # A transitory error, or the URL is incorrect, + # but capture either way. + if [ "$ret" -ne 0 ]; then + echo "$url" >> "${invalid_file}" + + die "check failed for URL $url after $url_check_max_tries tries" + fi + + local http_statuses + + http_statuses=$(grep -E "^HTTP" "$curl_out" | awk '{print $2}' || true) + if [ -z "$http_statuses" ]; then + echo "$url" >> "${invalid_file}" + die "no HTTP status codes for URL $url" + fi + + local status + + for status in $http_statuses + do + # Ignore the following ranges of status codes: + # + # - 1xx: Informational codes. + # - 2xx: Success codes. + # - 3xx: Redirection codes. + # - 405: Specifically to handle some sites + # which get upset by "curl -L" when the + # redirection is not required. + # + # Anything else is considered an error. + # + # See https://en.wikipedia.org/wiki/List_of_HTTP_status_codes + + if ! echo "$status" | grep -qE "^(1[0-9][0-9]|2[0-9][0-9]|3[0-9][0-9]|405)"; then + echo "$url" >> "$invalid_file" + die "found HTTP error status codes for URL $url ($status)" + fi + done +} + +# Perform basic checks on documentation files +static_check_docs() +{ + local cmd="xurls" + + if [ ! "$(command -v $cmd)" ] + then + info "Installing $cmd utility" + + local version + local url + + version=$(get_test_version "externals.xurls.version") + url=$(get_test_version "externals.xurls.url") + + # xurls is very fussy about how it's built. + go install "${url}@${version}" + + command -v xurls &>/dev/null || + die 'xurls not found. Ensure that "$GOPATH/bin" is in your $PATH' + fi + + info "Checking documentation" + + local doc + local all_docs + local docs + local docs_status + local new_docs + local new_urls + local url + + all_docs=$(git ls-files "*.md" | grep -Ev "(grpc-rs|target)/" | sort || true) + all_docs=$(skip_paths "${all_docs[@]}") + + if [ "$specific_branch" = "true" ] + then + info "Checking all documents in $branch branch" + docs="$all_docs" + else + info "Checking local branch for changed documents only" + + docs_status=$(get_pr_changed_file_details || true) + docs_status=$(echo "$docs_status" | grep "\.md$" || true) + + docs=$(echo "$docs_status" | awk '{print $NF}' | sort) + docs=$(skip_paths "${docs[@]}") + + # Newly-added docs + new_docs=$(echo "$docs_status" | awk '/^A/ {print $NF}' | sort) + new_docs=$(skip_paths "${new_docs[@]}") + + for doc in $new_docs + do + # A new document file has been added. If that new doc + # file is referenced by any files on this PR, checking + # its URL will fail since the PR hasn't been merged + # yet. We could construct the URL based on the users + # original PR branch and validate that. But it's + # simpler to just construct the URL that the "pending + # document" *will* result in when the PR has landed + # and then check docs for that new URL and exclude + # them from the real URL check. + url="https://${repo}/blob/${branch}/${doc}" + + new_urls+=" ${url}" + done + fi + + [ -z "$docs" ] && info "No documentation to check" && return + + local urls + local url_map=$(mktemp) + local invalid_urls=$(mktemp) + local md_links=$(mktemp) + files_to_remove+=("${url_map}" "${invalid_urls}" "${md_links}") + + info "Checking document markdown references" + + local md_docs_to_check + + # All markdown docs are checked (not just those changed by a PR). This + # is necessary to guarantee that all docs are referenced. + md_docs_to_check="$all_docs" + + (cd "${tests_repo_dir}" && make check-markdown) + + command -v kata-check-markdown &>/dev/null || \ + die 'kata-check-markdown command not found. Ensure that "$GOPATH/bin" is in your $PATH.' + + for doc in $md_docs_to_check + do + kata-check-markdown check "$doc" + + # Get a link of all other markdown files this doc references + kata-check-markdown list links --format tsv --no-header "$doc" |\ + grep "external-link" |\ + awk '{print $3}' |\ + sort -u >> "$md_links" + done + + # clean the list of links + local tmp + tmp=$(mktemp) + + sort -u "$md_links" > "$tmp" + mv "$tmp" "$md_links" + + # A list of markdown files that do not have to be referenced by any + # other markdown file. + exclude_doc_regexs+=() + + exclude_doc_regexs+=(^CODE_OF_CONDUCT\.md$) + exclude_doc_regexs+=(^CONTRIBUTING\.md$) + + # Magic github template files + exclude_doc_regexs+=(^\.github/.*\.md$) + + # The top level README doesn't need to be referenced by any other + # since it displayed by default when visiting the repo. + exclude_doc_regexs+=(^README\.md$) + + local exclude_pattern + + # Convert the list of files into an egrep(1) alternation pattern. + exclude_pattern=$(echo "${exclude_doc_regexs[@]}"|sed 's, ,|,g') + + # Every document in the repo (except a small handful of exceptions) + # should be referenced by another document. + for doc in $md_docs_to_check + do + # Check the ignore list for markdown files that do not need to + # be referenced by others. + echo "$doc"|egrep -q "(${exclude_pattern})" && continue + + grep -q "$doc" "$md_links" || die "Document $doc is not referenced" + done + + info "Checking document code blocks" + + local doc_to_script_cmd="${cidir}/kata-doc-to-script.sh" + + for doc in $docs + do + bash "${doc_to_script_cmd}" -csv "$doc" + + # Look for URLs in the document + urls=$("${doc_to_script_cmd}" -i "$doc" - | "$cmd") + + # Gather URLs + for url in $urls + do + printf "%s\t%s\n" "${url}" "${doc}" >> "$url_map" + done + done + + # Get unique list of URLs + urls=$(awk '{print $1}' "$url_map" | sort -u) + + info "Checking all document URLs" + local invalid_urls_dir=$(mktemp -d) + files_to_remove+=("${invalid_urls_dir}") + + for url in $urls + do + if [ "$specific_branch" != "true" ] + then + # If the URL is new on this PR, it cannot be checked. + echo "$new_urls" | egrep -q "\<${url}\>" && \ + info "ignoring new (but correct) URL: $url" && continue + fi + + # Ignore local URLs. The only time these are used is in + # examples (meaning these URLs won't exist). + echo "$url" | grep -q "^file://" && continue + echo "$url" | grep -q "^http://localhost" && continue + + # Ignore the install guide URLs that contain a shell variable + echo "$url" | grep -q "\\$" && continue + + # This prefix requires the client to be logged in to github, so ignore + echo "$url" | grep -q 'https://github.com/pulls' && continue + + # Sigh. + echo "$url"|grep -q 'https://example.com' && continue + + # Google APIs typically require an auth token. + echo "$url"|grep -q 'https://www.googleapis.com' && continue + + # Git repo URL check + if echo "$url"|grep -q '^https.*git' + then + timeout "${KATA_NET_TIMEOUT}" git ls-remote "$url" > /dev/null 2>&1 && continue + fi + + # Check the URL, saving it if invalid + # + # Each URL is checked in a separate process as each unique URL + # requires us to hit the network. + check_url "$url" "$invalid_urls_dir" & + done + + # Synchronisation point + wait + + # Combine all the separate invalid URL files into one + local invalid_files=$(ls "$invalid_urls_dir") + + if [ -n "$invalid_files" ]; then + pushd "$invalid_urls_dir" &>/dev/null + cat $(echo "$invalid_files"|tr '\n' ' ') > "$invalid_urls" + popd &>/dev/null + fi + + if [ -s "$invalid_urls" ] + then + local files + + cat "$invalid_urls" | while read url + do + files=$(grep "^${url}" "$url_map" | awk '{print $2}' | sort -u) + echo >&2 -e "ERROR: Invalid URL '$url' found in the following files:\n" + + for file in $files + do + echo >&2 "$file" + done + done + + exit 1 + fi + + # Now, spell check the docs + cmd="${tests_repo_dir}/cmd/check-spelling/kata-spell-check.sh" + + local docs_failed=0 + for doc in $docs + do + "$cmd" check "$doc" || { info "spell check failed for document $doc" && docs_failed=1; } + + static_check_eof "$doc" + done + + [ $docs_failed -eq 0 ] || die "spell check failed, See https://github.com/kata-containers/kata-containers/blob/main/docs/Documentation-Requirements.md#spelling for more information." +} + +static_check_eof() +{ + local file="$1" + local anchor="EOF" + + + [ -z "$file" ] && info "No files to check" && return + + # Skip the itself + [ "$file" == "$script_name" ] && return + + # Skip the Vagrantfile + [ "$file" == "Vagrantfile" ] && return + + local invalid=$(cat "$file" |\ + egrep -o '<<-* *\w*' |\ + sed -e 's/^<<-*//g' |\ + tr -d ' ' |\ + sort -u |\ + egrep -v '^$' |\ + egrep -v "$anchor" || true) + [ -z "$invalid" ] || die "Expected '$anchor' here anchor, in $file found: $invalid" +} + +# Tests to apply to all files. +# +# Currently just looks for TODO/FIXME comments that should be converted to +# (or annotated with) an Issue URL. +static_check_files() +{ + local file + local files + + if [ "$force" = "false" ] + then + info "Skipping check_files: see https://github.com/kata-containers/tests/issues/469" + return + else + info "Force override of check_files skip" + fi + + info "Checking files" + + if [ "$specific_branch" = "true" ] + then + info "Checking all files in $branch branch" + + files=$(git ls-files | egrep -v "/(.git|vendor|grpc-rs|target)/" || true) + else + info "Checking local branch for changed files only" + + files=$(get_pr_changed_file_details || true) + + # Strip off status + files=$(echo "$files"|awk '{print $NF}') + fi + + [ -z "$files" ] && info "No files changed" && return + + local matches="" + + for file in $files + do + local match + + # Look for files containing the specified comment tags but + # which do not include a github URL. + match=$(egrep -H "\|\" "$file" |\ + grep -v "https://github.com/.*/issues/[0-9]" |\ + cut -d: -f1 |\ + sort -u || true) + + [ -z "$match" ] && continue + + # Don't fail if this script contains the patterns + # (as it is guaranteed to ;) + echo "$file" | grep -q "${script_name}$" && info "Ignoring special file $file" && continue + + # We really only care about comments in code. But to avoid + # having to hard-code the list of file extensions to search, + # invert the problem by simply ignoring document files and + # considering all other file types. + echo "$file" | grep -q ".md$" && info "Ignoring comment tag in document $file" && continue + + matches+=" $match" + done + + [ -z "$matches" ] && return + + echo >&2 -n \ + "ERROR: The following files contain TODO/FIXME's that need " + echo >&2 -e "converting to issues:\n" + + for file in $matches + do + echo >&2 "$file" + done + + # spacer + echo >&2 + + exit 1 +} + +# Perform vendor checks: +# +# - Ensure that changes to vendored code are accompanied by an update to the +# vendor tooling config file. If not, the user simply hacked the vendor files +# rather than following the correct process: +# +# https://github.com/kata-containers/community/blob/main/VENDORING.md +# +# - Ensure vendor metadata is valid. +static_check_vendor() +{ + local files + local vendor_files + local result + + # Check if repo has been changed to use go modules + if [ -f "go.mod" ]; then + info "go.mod file found, running go mod verify instead" + # This verifies the integrity of modules in the local cache. + # This does not really verify the integrity of vendored code: + # https://github.com/golang/go/issues/27348 + # Once that is added we need to add an extra step to verify vendored code. + go mod verify + return + fi +} + +static_check_xml() +{ + local all_xml + local files + + need_chronic + + all_xml=$(git ls-files "*.xml" | grep -Ev "/(vendor|grpc-rs|target)/" | sort || true) + + if [ "$specific_branch" = "true" ] + then + info "Checking all XML files in $branch branch" + files="$all_xml" + else + info "Checking local branch for changed XML files only" + + local xml_status + + xml_status=$(get_pr_changed_file_details || true) + xml_status=$(echo "$xml_status" | grep "\.xml$" || true) + + files=$(echo "$xml_status" | awk '{print $NF}') + fi + + [ -z "$files" ] && info "No XML files to check" && return + + local file + + for file in $files + do + info "Checking XML file '$file'" + + local contents + + # Most XML documents are specified as XML 1.0 since, with the + # advent of XML 1.0 (Fifth Edition), XML 1.1 is "almost + # redundant" due to XML 1.0 providing the majority of XML 1.1 + # features. xmllint doesn't support XML 1.1 seemingly for this + # reason, so the only check we can do is to (crudely) force + # the document to be an XML 1.0 one since XML 1.1 documents + # can mostly be represented as XML 1.0. + # + # This is only really required since Jenkins creates XML 1.1 + # documents. + contents=$(sed "s/xml version='1.1'/xml version='1.0'/g" "$file") + + local ret + + { $chronic xmllint -format - <<< "$contents"; ret=$?; } || true + + [ "$ret" -eq 0 ] || die "failed to check XML file '$file'" + done +} + +static_check_shell() +{ + local all_scripts + local scripts + + need_chronic + + all_scripts=$(git ls-files "*.sh" "*.bash" | grep -Ev "/(vendor|grpc-rs|target)/" | sort || true) + + if [ "$specific_branch" = "true" ] + then + info "Checking all scripts in $branch branch" + scripts="$all_scripts" + else + info "Checking local branch for changed scripts only" + + local scripts_status + scripts_status=$(get_pr_changed_file_details || true) + scripts_status=$(echo "$scripts_status" | grep -E "\.(sh|bash)$" || true) + + scripts=$(echo "$scripts_status" | awk '{print $NF}') + fi + + [ -z "$scripts" ] && info "No scripts to check" && return 0 + + local script + + for script in $scripts + do + info "Checking script file '$script'" + + local ret + + { $chronic bash -n "$script"; ret=$?; } || true + + [ "$ret" -eq 0 ] || die "check for script '$script' failed" + + static_check_eof "$script" + done +} + +static_check_json() +{ + local all_json + local json_files + + need_chronic + + all_json=$(git ls-files "*.json" | grep -Ev "/(vendor|grpc-rs|target)/" | sort || true) + + if [ "$specific_branch" = "true" ] + then + info "Checking all JSON in $branch branch" + json_files="$all_json" + else + info "Checking local branch for changed JSON only" + + local json_status + json_status=$(get_pr_changed_file_details || true) + json_status=$(echo "$json_status" | grep "\.json$" || true) + + json_files=$(echo "$json_status" | awk '{print $NF}') + fi + + [ -z "$json_files" ] && info "No JSON files to check" && return 0 + + local json + + for json in $json_files + do + info "Checking JSON file '$json'" + + local ret + + { $chronic jq -S . "$json"; ret=$?; } || true + + [ "$ret" -eq 0 ] || die "failed to check JSON file '$json'" + done +} + +# The dockerfile checker relies on the hadolint tool. This function handle its +# installation if it is not found on PATH. +# Note that we need a specific version of the tool as it seems to not have +# backward/forward compatibility between versions. +has_hadolint_or_install() +{ + # Global variable set by the caller. It might be overwritten here. + linter_cmd=${linter_cmd:-"hadolint"} + local linter_version=$(get_test_version "externals.hadolint.version") + local linter_url=$(get_test_version "externals.hadolint.url") + local linter_dest="${GOPATH}/bin/hadolint" + + local has_linter=$(command -v "$linter_cmd") + if [[ -z "$has_linter" && "$KATA_DEV_MODE" == "yes" ]]; then + # Do not install if it is in development mode. + die "$linter_cmd command not found. You must have the version $linter_version installed to run this check." + elif [ -n "$has_linter" ]; then + # Check if the expected linter version + if $linter_cmd --version | grep -v "$linter_version" &>/dev/null; then + warn "$linter_cmd command found but not the required version $linter_version" + has_linter="" + fi + fi + + if [ -z "$has_linter" ]; then + local download_url="${linter_url}/releases/download/v${linter_version}/hadolint-Linux-x86_64" + info "Installing $linter_cmd $linter_version at $linter_dest" + + curl -sfL "$download_url" -o "$linter_dest" || \ + die "Failed to download $download_url" + chmod +x "$linter_dest" + + # Overwrite in case it cannot be found in PATH. + linter_cmd="$linter_dest" + fi +} + +static_check_dockerfiles() +{ + local all_files + local files + local ignore_files + # Put here a list of files which should be ignored. + local ignore_files=( + ) + local linter_cmd="hadolint" + + all_files=$(git ls-files "*/Dockerfile*" | grep -Ev "/(vendor|grpc-rs|target)/" | sort || true) + + if [ "$specific_branch" = "true" ]; then + info "Checking all Dockerfiles in $branch branch" + files="$all_files" + else + info "Checking local branch for changed Dockerfiles only" + + local files_status + files_status=$(get_pr_changed_file_details || true) + files_status=$(echo "$files_status" | grep -E "Dockerfile.*$" || true) + + files=$(echo "$files_status" | awk '{print $NF}') + fi + + [ -z "$files" ] && info "No Dockerfiles to check" && return 0 + + # As of this writing hadolint is only distributed for x86_64 + if [ "$(uname -m)" != "x86_64" ]; then + info "Skip checking as $linter_cmd is not available for $(uname -m)" + return 0 + fi + has_hadolint_or_install + + linter_cmd+=" --no-color" + + # Let's not fail with INFO rules. + linter_cmd+=" --failure-threshold warning" + + # Some rules we don't want checked, below we ignore them. + # + # "DL3008 warning: Pin versions in apt get install" + linter_cmd+=" --ignore DL3008" + # "DL3041 warning: Specify version with `dnf install -y -`" + linter_cmd+=" --ignore DL3041" + # "DL3033 warning: Specify version with `yum install -y -`" + linter_cmd+=" --ignore DL3033" + # "DL3018 warning: Pin versions in apk add. Instead of `apk add ` use `apk add =`" + linter_cmd+=" --ignore DL3018" + # "DL3003 warning: Use WORKDIR to switch to a directory" + # See https://github.com/hadolint/hadolint/issues/70 + linter_cmd+=" --ignore DL3003" + # "DL3048 style: Invalid label key" + linter_cmd+=" --ignore DL3048" + # DL3037 warning: Specify version with `zypper install -y =`. + linter_cmd+=" --ignore DL3037" + + local file + for file in $files; do + if echo "${ignore_files[@]}" | grep -q $file ; then + info "Ignoring Dockerfile '$file'" + continue + fi + + info "Checking Dockerfile '$file'" + local ret + # The linter generates an Abstract Syntax Tree (AST) from the + # dockerfile. Some of our dockerfiles are actually templates + # with special syntax, thus the linter might fail to build + # the AST. Here we handle Dockerfile templates. + if [[ "$file" =~ Dockerfile.*\.(in|template)$ ]]; then + # In our templates, text with marker as @SOME_NAME@ is + # replaceable. Usually it is used to replace in a + # FROM command (e.g. `FROM @UBUNTU_REGISTRY@/ubuntu`) + # but also to add an entire block of commands. Example + # of later: + # ``` + # RUN apt-get install -y package1 + # @INSTALL_MUSL@ + # @INSTALL_RUST@ + # ``` + # It's known that the linter will fail to parse lines + # started with `@`. Also it might give false-positives + # on some cases. Here we remove all markers as a best + # effort approach. If the template file is still + # unparseable then it should be added in the + # `$ignore_files` list. + { sed -e 's/^@[A-Z_]*@//' -e 's/@\([a-zA-Z_]*\)@/\1/g' "$file" | $linter_cmd -; ret=$?; }\ + || true + else + # Non-template Dockerfile. + { $linter_cmd "$file"; ret=$?; } || true + fi + + [ "$ret" -eq 0 ] || die "failed to check Dockerfile '$file'" + done +} + +# Run the specified function (after first checking it is compatible with the +# users architectural preferences), or simply list the function name if list +# mode is active. +run_or_list_check_function() +{ + local name="$1" + + func_is_valid "$name" + + local arch_func + local handler + + arch_func=$(func_is_arch_specific "$name") + + handler="info" + + # If the user requested only a single function to run, we should die + # if the function cannot be run due to the other options specified. + # + # Whereas if this script is running all functions, just display an + # info message if a function cannot be run. + [ "$single_func_only" = "true" ] && handler="die" + + if [ "$handle_funcs" = "arch-agnostic" ] && [ "$arch_func" = "yes" ]; then + if [ "$list_only" != "true" ]; then + "$handler" "Not running '$func' as requested no architecture-specific functions" + fi + + return 0 + fi + + if [ "$handle_funcs" = "arch-specific" ] && [ "$arch_func" = "no" ]; then + if [ "$list_only" != "true" ]; then + "$handler" "Not running architecture-agnostic function '$func' as requested only architecture specific functions" + fi + + return 0 + fi + + if [ "$list_only" = "true" ]; then + echo "$func" + return 0 + fi + + info "Running '$func' function" + eval "$func" +} + +main() +{ + trap remove_tmp_files EXIT + + local long_option_names="${!long_options[@]}" + + local args + + args=$(getopt \ + -n "$script_name" \ + -a \ + --options="h" \ + --longoptions="$long_option_names" \ + -- "$@") + [ $? -eq 0 ] || { usage >&2; exit 1; } + + eval set -- "$args" + + local func= + + while [ $# -gt 1 ] + do + case "$1" in + --all) specific_branch="true" ;; + --branch) branch="$2"; shift ;; + --commits) func=static_check_commits ;; + --docs) func=static_check_docs ;; + --dockerfiles) func=static_check_dockerfiles ;; + --files) func=static_check_files ;; + --force) force="true" ;; + --golang) func=static_check_go_arch_specific ;; + -h|--help) usage; exit 0 ;; + --json) func=static_check_json ;; + --labels) func=static_check_labels;; + --licenses) func=static_check_license_headers ;; + --list) list_only="true" ;; + --no-arch) handle_funcs="arch-agnostic" ;; + --only-arch) handle_funcs="arch-specific" ;; + --repo) repo="$2"; shift ;; + --scripts) func=static_check_shell ;; + --vendor) func=static_check_vendor;; + --versions) func=static_check_versions ;; + --xml) func=static_check_xml ;; + --) shift; break ;; + esac + + shift + done + + # Consume getopt cruft + [ "$1" = "--" ] && shift + + [ "$1" = "help" ] && usage && exit 0 + + # Set if not already set by options + [ -z "$repo" ] && repo="$1" + [ "$specific_branch" = "false" ] && specific_branch="$2" + + if [ -z "$repo" ] + then + if [ -n "$KATA_DEV_MODE" ] + then + # No repo param provided so assume it's the current + # one to avoid developers having to specify one now + # (backwards compatability). + repo=$(git config --get remote.origin.url |\ + sed 's!https://!!g' || true) + + info "Auto-detected repo as $repo" + else + if [ "$list_only" != "true" ]; then + echo >&2 "ERROR: need repo" && usage && exit 1 + fi + fi + fi + + local all_check_funcs=$(typeset -F|awk '{print $3}'|grep "${check_func_regex}"|sort) + + # Run user-specified check and quit + if [ -n "$func" ]; then + single_func_only="true" + run_or_list_check_function "$func" + exit 0 + fi + + for func in $all_check_funcs + do + run_or_list_check_function "$func" + done +} + +main "$@"