From 3ad43df94613e574c176cdd49ee9bef0b18dfecf Mon Sep 17 00:00:00 2001 From: "James O. D. Hunt" Date: Wed, 6 Dec 2023 15:29:18 +0000 Subject: [PATCH 1/8] CI: static-checks: Improve markdown checker test Only attempt to build the markdown checker if it doesn't already exist. Signed-off-by: James O. D. Hunt --- tests/static-checks.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/static-checks.sh b/tests/static-checks.sh index 925306c7b..a1b606ec6 100755 --- a/tests/static-checks.sh +++ b/tests/static-checks.sh @@ -647,7 +647,8 @@ static_check_docs() # is necessary to guarantee that all docs are referenced. md_docs_to_check="$all_docs" - (cd "${test_dir}" && make -C cmd/check-markdown) + command -v kata-check-markdown &>/dev/null ||\ + (cd "${test_dir}" && make -C cmd/check-markdown) command -v kata-check-markdown &>/dev/null || \ die 'kata-check-markdown command not found. Ensure that "$GOPATH/bin" is in your $PATH.' From 563ea020b0864f6cddd0174778eb249ff00c0622 Mon Sep 17 00:00:00 2001 From: "James O. D. Hunt" Date: Wed, 6 Dec 2023 15:30:18 +0000 Subject: [PATCH 2/8] CI: static-checks: Fold long line Break up a long line as little to make it easier to read. Signed-off-by: James O. D. Hunt --- tests/static-checks.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/static-checks.sh b/tests/static-checks.sh index a1b606ec6..0635fc8c6 100755 --- a/tests/static-checks.sh +++ b/tests/static-checks.sh @@ -811,7 +811,10 @@ static_check_docs() popd - [ $docs_failed -eq 0 ] || die "spell check failed, See https://github.com/kata-containers/kata-containers/blob/main/docs/Documentation-Requirements.md#spelling for more information." + [ $docs_failed -eq 0 ] || { + url='https://github.com/kata-containers/kata-containers/blob/main/docs/Documentation-Requirements.md#spelling' + die "spell check failed, See $url for more information." + } } static_check_eof() From efa8e6547c9aa56150027f2b88b9fbd77a08b2b2 Mon Sep 17 00:00:00 2001 From: "James O. D. Hunt" Date: Wed, 6 Dec 2023 15:32:37 +0000 Subject: [PATCH 3/8] CI: static-checks: Check params have a value Check that the `check_url()` parameters have a value. Signed-off-by: James O. D. Hunt --- tests/static-checks.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/static-checks.sh b/tests/static-checks.sh index 0635fc8c6..037b18104 100755 --- a/tests/static-checks.sh +++ b/tests/static-checks.sh @@ -494,8 +494,11 @@ EOF check_url() { - local url="$1" - local invalid_urls_dir="$2" + local url="${1:-}" + [ -n "$url" ] || die "need URL to check" + + local invalid_urls_dir="${2:-}" + [ -n "$invalid_urls_dir" ] || die "need invalid URLs directory" local curl_out=$(mktemp) files_to_remove+=("${curl_out}") From 6d859f97eec96fb4dc096e299bb77709f0e83f52 Mon Sep 17 00:00:00 2001 From: "James O. D. Hunt" Date: Wed, 6 Dec 2023 15:34:57 +0000 Subject: [PATCH 4/8] CI: static-checks: Lint fixes Declare and then define a couple of variables separately. Signed-off-by: James O. D. Hunt --- tests/static-checks.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/static-checks.sh b/tests/static-checks.sh index 037b18104..de8286873 100755 --- a/tests/static-checks.sh +++ b/tests/static-checks.sh @@ -500,13 +500,16 @@ check_url() local invalid_urls_dir="${2:-}" [ -n "$invalid_urls_dir" ] || die "need invalid URLs directory" - local curl_out=$(mktemp) + local curl_out + curl_out=$(mktemp) + files_to_remove+=("${curl_out}") info "Checking URL $url" # Process specific file to avoid out-of-order writes - local invalid_file=$(printf "%s/%d" "$invalid_urls_dir" "$$") + local invalid_file + invalid_file=$(printf "%s/%d" "$invalid_urls_dir" "$$") local ret local user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36" From 613def0328cdc1c87e272a1c67b4eabec1441204 Mon Sep 17 00:00:00 2001 From: "James O. D. Hunt" Date: Wed, 6 Dec 2023 15:41:32 +0000 Subject: [PATCH 5/8] CI: static-checks: Move curl to a separate function Split the call to `curl` in the URL checker out into a new `run_url_check_cmd()` function to make `check_url()` slightly clearer. Signed-off-by: James O. D. Hunt --- tests/static-checks.sh | 45 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 37 insertions(+), 8 deletions(-) diff --git a/tests/static-checks.sh b/tests/static-checks.sh index de8286873..8a6ea508b 100755 --- a/tests/static-checks.sh +++ b/tests/static-checks.sh @@ -492,6 +492,40 @@ EOF popd } +run_url_check_cmd() +{ + local url="${1:-}" + [ -n "$url" ] || die "need URL" + + local out_file="${2:-}" + [ -n "$out_file" ] || die "need output file" + + # Can be blank + local extra_args="${3:-}" + + local curl_extra_args=() + + curl_extra_args+=("$extra_args") + + # Authenticate for github to increase threshold for rate limiting + if [[ "$url" =~ github\.com && -n "$GITHUB_USER" && -n "$GITHUB_TOKEN" ]]; then + curl_extra_args+=("-u ${GITHUB_USER}:${GITHUB_TOKEN}") + fi + + # Some endpoints return 403 to HEAD but 200 for GET, + # so perform a GET but only read headers. + curl \ + ${curl_extra_args[*]} \ + -sIL \ + -X GET \ + -c - \ + -H "Accept-Encoding: zstd, none, gzip, deflate" \ + --max-time "$url_check_timeout_secs" \ + --retry "$url_check_max_tries" \ + "$url" \ + &>"$out_file" +} + check_url() { local url="${1:-}" @@ -514,15 +548,10 @@ check_url() local ret local user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36" - # Authenticate for github to increase threshold for rate limiting - local curl_args=() - if [[ "$url" =~ github\.com && -n "$GITHUB_USER" && -n "$GITHUB_TOKEN" ]]; then - curl_args+=("-u ${GITHUB_USER}:${GITHUB_TOKEN}") - fi + local curl_ua_args + curl_ua_args="-A '$user_agent'" - # Some endpoints return 403 to HEAD but 200 for GET, so perform a GET but only read headers. - { curl ${curl_args[*]} -sIL -X GET -c - -A "${user_agent}" -H "Accept-Encoding: zstd, none, gzip, deflate" --max-time "$url_check_timeout_secs" \ - --retry "$url_check_max_tries" "$url" &>"$curl_out"; ret=$?; } || true + { run_url_check_cmd "$url" "$curl_out" "$curl_ua_args"; ret=$?; } || true # A transitory error, or the URL is incorrect, # but capture either way. From 3779261a99c9f42f81821a202ed20b1935e24ede Mon Sep 17 00:00:00 2001 From: "James O. D. Hunt" Date: Fri, 8 Dec 2023 17:05:22 +0000 Subject: [PATCH 6/8] docs: Fix whitespace Remove some extraneous whitespace. Signed-off-by: James O. D. Hunt --- docs/install/azure-installation-guide.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/install/azure-installation-guide.md b/docs/install/azure-installation-guide.md index 53cbf2600..b62bcf824 100644 --- a/docs/install/azure-installation-guide.md +++ b/docs/install/azure-installation-guide.md @@ -3,8 +3,8 @@ Kata Containers on Azure use nested virtualization to provide an identical installation experience to Kata on your preferred Linux distribution. -This guide assumes you have an Azure account set up and tools to remotely login to your virtual -machine (SSH). Instructions will use [Azure Portal](https://portal.azure.com/) to avoid +This guide assumes you have an Azure account set up and tools to remotely login to your virtual +machine (SSH). Instructions will use [Azure Portal](https://portal.azure.com/) to avoid local dependencies and setup. ## Create a new virtual machine with nesting support From 3174c18772209fdec2212bb36a2622f9bd6ded68 Mon Sep 17 00:00:00 2001 From: "James O. D. Hunt" Date: Fri, 8 Dec 2023 17:06:02 +0000 Subject: [PATCH 7/8] docs: Remove problematic URL Removed the Azure Portal URL (https://portal.azure.com) since this causes problems with our static checks script: that URL returns HTTP 403 ("Forbidden") when queried using command-line tools like `curl(1)`, which is used by the static check script. Signed-off-by: James O. D. Hunt --- docs/install/azure-installation-guide.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/install/azure-installation-guide.md b/docs/install/azure-installation-guide.md index b62bcf824..dc406064f 100644 --- a/docs/install/azure-installation-guide.md +++ b/docs/install/azure-installation-guide.md @@ -4,7 +4,7 @@ Kata Containers on Azure use nested virtualization to provide an identical insta experience to Kata on your preferred Linux distribution. This guide assumes you have an Azure account set up and tools to remotely login to your virtual -machine (SSH). Instructions will use [Azure Portal](https://portal.azure.com/) to avoid +machine (SSH). Instructions will use the Azure Portal to avoid local dependencies and setup. ## Create a new virtual machine with nesting support From 5d085a3042fe5756a90703acf6362531bef5fef3 Mon Sep 17 00:00:00 2001 From: "James O. D. Hunt" Date: Wed, 6 Dec 2023 15:58:42 +0000 Subject: [PATCH 8/8] CI: static-checks: Try multiple user agents Make the URL checker cycle through a list of user agent values until we hit one the remote server is happy with. This is required since, unfortunately, we really, really want to check these URLs, but some sites block clients based on their `User-Agent` (UA) request header value. And of course, each site is different and can change its behaviour at any time. Our strategy therefore is to try various UA's until we find one the server accepts: - No explicit UA (use `curl`'s default) - Explicitly no UA. - A blank UA. - Partial UA values for various CLI tools. - Partial UA values for various console web browsers. - Partial UA for Emacs's built-in browser. - The existing UA which is used as a "last ditch" attempt where the UA implies multiple platforms and browser. > **Notes:** > > - The "partial UA" values specify specify the UA "product" but not the > UA "product version": we specify `foo` and not `foo/1.2.3`). We do > this since most sites tested appear to not care about the version. > This is as expected given that the version is strictly optional (see `[*]`). > > - We now log all errors and display an error summary if none of the UAs > worked, in addition to the simple list of the URLs we believe to be > invalid. This should make future debugging simpler. `[*]` - https://www.rfc-editor.org/rfc/rfc9110#section-10.1.5 Fixes: #8553. Signed-off-by: James O. D. Hunt --- tests/static-checks.sh | 129 ++++++++++++++++++++++++++++++----------- 1 file changed, 94 insertions(+), 35 deletions(-) diff --git a/tests/static-checks.sh b/tests/static-checks.sh index 8a6ea508b..84341e7ff 100755 --- a/tests/static-checks.sh +++ b/tests/static-checks.sh @@ -539,58 +539,117 @@ check_url() files_to_remove+=("${curl_out}") - info "Checking URL $url" - # Process specific file to avoid out-of-order writes local invalid_file invalid_file=$(printf "%s/%d" "$invalid_urls_dir" "$$") local ret - local user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36" - local curl_ua_args - curl_ua_args="-A '$user_agent'" + local -a errors=() - { run_url_check_cmd "$url" "$curl_out" "$curl_ua_args"; ret=$?; } || true + local -a user_agents=() - # A transitory error, or the URL is incorrect, - # but capture either way. - if [ "$ret" -ne 0 ]; then - echo "$url" >> "${invalid_file}" + # Test an unspecified UA (curl default) + user_agents+=('') - die "check failed for URL $url after $url_check_max_tries tries" - fi + # Test an explictly blank UA + user_agents+=('""') - local http_statuses + # Single space + user_agents+=(' ') - http_statuses=$(grep -E "^HTTP" "$curl_out" | awk '{print $2}' || true) - if [ -z "$http_statuses" ]; then - echo "$url" >> "${invalid_file}" - die "no HTTP status codes for URL $url" - fi + # CLI HTTP tools + user_agents+=('Wget') + user_agents+=('curl') - local status + # console based browsers + # Hopefully, these will always be supported for a11y. + user_agents+=('Lynx') + user_agents+=('Elinks') - for status in $http_statuses + # Emacs' w3m browser + user_agents+=('Emacs') + + # The full craziness + user_agents+=('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36') + + local user_agent + + # Cycle through the user agents until we find one that works. + # + # Note that we also test an unspecified user agent + # (no '-A '). + for user_agent in "${user_agents[@]}" do - # Ignore the following ranges of status codes: - # - # - 1xx: Informational codes. - # - 2xx: Success codes. - # - 3xx: Redirection codes. - # - 405: Specifically to handle some sites - # which get upset by "curl -L" when the - # redirection is not required. - # - # Anything else is considered an error. - # - # See https://en.wikipedia.org/wiki/List_of_HTTP_status_codes + info "Checking URL $url with User Agent '$user_agent'" - if ! echo "$status" | grep -qE "^(1[0-9][0-9]|2[0-9][0-9]|3[0-9][0-9]|405)"; then - echo "$url" >> "$invalid_file" - die "found HTTP error status codes for URL $url ($status)" + local curl_ua_args + [ -n "$user_agent" ] && curl_ua_args="-A '$user_agent'" + + { run_url_check_cmd "$url" "$curl_out" "$curl_ua_args"; ret=$?; } || true + + # A transitory error, or the URL is incorrect, + # but capture either way. + if [ "$ret" -ne 0 ]; then + errors+=("Failed to check URL '$url' (user agent: '$user_agent', return code $ret)") + + # Try again with another UA since it appears that some return codes + # indicate the server was unhappy with the details + # presented by the client. + continue fi + + local http_statuses + + http_statuses=$(grep -E "^HTTP" "$curl_out" |\ + awk '{print $2}' || true) + + if [ -z "$http_statuses" ]; then + errors+=("no HTTP status codes for URL '$url' (user agent: '$user_agent')") + + continue + fi + + local status + + local -i fail_count=0 + + # Check all HTTP status codes + for status in $http_statuses + do + # Ignore the following ranges of status codes: + # + # - 1xx: Informational codes. + # - 2xx: Success codes. + # - 3xx: Redirection codes. + # - 405: Specifically to handle some sites + # which get upset by "curl -L" when the + # redirection is not required. + # + # Anything else is considered an error. + # + # See https://en.wikipedia.org/wiki/List_of_HTTP_status_codes + + { grep -qE "^(1[0-9][0-9]|2[0-9][0-9]|3[0-9][0-9]|405)" <<< "$status"; ret=$?; } || true + + [ "$ret" -eq 0 ] && continue + + fail_count+=1 + done + + # If we didn't receive any unexpected HTTP status codes for + # this UA, the URL is valid so we don't need to check with any + # further UAs, so clear any (transitory) errors we've + # recorded. + [ "$fail_count" -eq 0 ] && errors=() && break + + echo "$url" >> "$invalid_file" + errors+=("found HTTP error status codes for URL $url (status: '$status', user agent: '$user_agent')") done + + [ "${#errors}" = 0 ] && return 0 + + die "failed to check URL '$url': errors: '${errors[*]}'" } # Perform basic checks on documentation files