mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-06-01 20:06:35 +00:00
Merge pull request #8592 from jodh-intel/static-checks-try-multiple-user-agents
CI: static-checks: Try multiple user agents
This commit is contained in:
commit
2a35541af7
@ -4,7 +4,7 @@ Kata Containers on Azure use nested virtualization to provide an identical insta
|
|||||||
experience to Kata on your preferred Linux distribution.
|
experience to Kata on your preferred Linux distribution.
|
||||||
|
|
||||||
This guide assumes you have an Azure account set up and tools to remotely login to your virtual
|
This guide assumes you have an Azure account set up and tools to remotely login to your virtual
|
||||||
machine (SSH). Instructions will use [Azure Portal](https://portal.azure.com/) to avoid
|
machine (SSH). Instructions will use the Azure Portal to avoid
|
||||||
local dependencies and setup.
|
local dependencies and setup.
|
||||||
|
|
||||||
## Create a new virtual machine with nesting support
|
## Create a new virtual machine with nesting support
|
||||||
|
@ -492,50 +492,129 @@ EOF
|
|||||||
popd
|
popd
|
||||||
}
|
}
|
||||||
|
|
||||||
check_url()
|
run_url_check_cmd()
|
||||||
{
|
{
|
||||||
local url="$1"
|
local url="${1:-}"
|
||||||
local invalid_urls_dir="$2"
|
[ -n "$url" ] || die "need URL"
|
||||||
|
|
||||||
local curl_out=$(mktemp)
|
local out_file="${2:-}"
|
||||||
files_to_remove+=("${curl_out}")
|
[ -n "$out_file" ] || die "need output file"
|
||||||
|
|
||||||
info "Checking URL $url"
|
# Can be blank
|
||||||
|
local extra_args="${3:-}"
|
||||||
|
|
||||||
# Process specific file to avoid out-of-order writes
|
local curl_extra_args=()
|
||||||
local invalid_file=$(printf "%s/%d" "$invalid_urls_dir" "$$")
|
|
||||||
|
|
||||||
local ret
|
curl_extra_args+=("$extra_args")
|
||||||
local user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36"
|
|
||||||
|
|
||||||
# Authenticate for github to increase threshold for rate limiting
|
# Authenticate for github to increase threshold for rate limiting
|
||||||
local curl_args=()
|
|
||||||
if [[ "$url" =~ github\.com && -n "$GITHUB_USER" && -n "$GITHUB_TOKEN" ]]; then
|
if [[ "$url" =~ github\.com && -n "$GITHUB_USER" && -n "$GITHUB_TOKEN" ]]; then
|
||||||
curl_args+=("-u ${GITHUB_USER}:${GITHUB_TOKEN}")
|
curl_extra_args+=("-u ${GITHUB_USER}:${GITHUB_TOKEN}")
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Some endpoints return 403 to HEAD but 200 for GET, so perform a GET but only read headers.
|
# Some endpoints return 403 to HEAD but 200 for GET,
|
||||||
{ curl ${curl_args[*]} -sIL -X GET -c - -A "${user_agent}" -H "Accept-Encoding: zstd, none, gzip, deflate" --max-time "$url_check_timeout_secs" \
|
# so perform a GET but only read headers.
|
||||||
--retry "$url_check_max_tries" "$url" &>"$curl_out"; ret=$?; } || true
|
curl \
|
||||||
|
${curl_extra_args[*]} \
|
||||||
|
-sIL \
|
||||||
|
-X GET \
|
||||||
|
-c - \
|
||||||
|
-H "Accept-Encoding: zstd, none, gzip, deflate" \
|
||||||
|
--max-time "$url_check_timeout_secs" \
|
||||||
|
--retry "$url_check_max_tries" \
|
||||||
|
"$url" \
|
||||||
|
&>"$out_file"
|
||||||
|
}
|
||||||
|
|
||||||
|
check_url()
|
||||||
|
{
|
||||||
|
local url="${1:-}"
|
||||||
|
[ -n "$url" ] || die "need URL to check"
|
||||||
|
|
||||||
|
local invalid_urls_dir="${2:-}"
|
||||||
|
[ -n "$invalid_urls_dir" ] || die "need invalid URLs directory"
|
||||||
|
|
||||||
|
local curl_out
|
||||||
|
curl_out=$(mktemp)
|
||||||
|
|
||||||
|
files_to_remove+=("${curl_out}")
|
||||||
|
|
||||||
|
# Process specific file to avoid out-of-order writes
|
||||||
|
local invalid_file
|
||||||
|
invalid_file=$(printf "%s/%d" "$invalid_urls_dir" "$$")
|
||||||
|
|
||||||
|
local ret
|
||||||
|
|
||||||
|
local -a errors=()
|
||||||
|
|
||||||
|
local -a user_agents=()
|
||||||
|
|
||||||
|
# Test an unspecified UA (curl default)
|
||||||
|
user_agents+=('')
|
||||||
|
|
||||||
|
# Test an explictly blank UA
|
||||||
|
user_agents+=('""')
|
||||||
|
|
||||||
|
# Single space
|
||||||
|
user_agents+=(' ')
|
||||||
|
|
||||||
|
# CLI HTTP tools
|
||||||
|
user_agents+=('Wget')
|
||||||
|
user_agents+=('curl')
|
||||||
|
|
||||||
|
# console based browsers
|
||||||
|
# Hopefully, these will always be supported for a11y.
|
||||||
|
user_agents+=('Lynx')
|
||||||
|
user_agents+=('Elinks')
|
||||||
|
|
||||||
|
# Emacs' w3m browser
|
||||||
|
user_agents+=('Emacs')
|
||||||
|
|
||||||
|
# The full craziness
|
||||||
|
user_agents+=('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36')
|
||||||
|
|
||||||
|
local user_agent
|
||||||
|
|
||||||
|
# Cycle through the user agents until we find one that works.
|
||||||
|
#
|
||||||
|
# Note that we also test an unspecified user agent
|
||||||
|
# (no '-A <value>').
|
||||||
|
for user_agent in "${user_agents[@]}"
|
||||||
|
do
|
||||||
|
info "Checking URL $url with User Agent '$user_agent'"
|
||||||
|
|
||||||
|
local curl_ua_args
|
||||||
|
[ -n "$user_agent" ] && curl_ua_args="-A '$user_agent'"
|
||||||
|
|
||||||
|
{ run_url_check_cmd "$url" "$curl_out" "$curl_ua_args"; ret=$?; } || true
|
||||||
|
|
||||||
# A transitory error, or the URL is incorrect,
|
# A transitory error, or the URL is incorrect,
|
||||||
# but capture either way.
|
# but capture either way.
|
||||||
if [ "$ret" -ne 0 ]; then
|
if [ "$ret" -ne 0 ]; then
|
||||||
echo "$url" >> "${invalid_file}"
|
errors+=("Failed to check URL '$url' (user agent: '$user_agent', return code $ret)")
|
||||||
|
|
||||||
die "check failed for URL $url after $url_check_max_tries tries"
|
# Try again with another UA since it appears that some return codes
|
||||||
|
# indicate the server was unhappy with the details
|
||||||
|
# presented by the client.
|
||||||
|
continue
|
||||||
fi
|
fi
|
||||||
|
|
||||||
local http_statuses
|
local http_statuses
|
||||||
|
|
||||||
http_statuses=$(grep -E "^HTTP" "$curl_out" | awk '{print $2}' || true)
|
http_statuses=$(grep -E "^HTTP" "$curl_out" |\
|
||||||
|
awk '{print $2}' || true)
|
||||||
|
|
||||||
if [ -z "$http_statuses" ]; then
|
if [ -z "$http_statuses" ]; then
|
||||||
echo "$url" >> "${invalid_file}"
|
errors+=("no HTTP status codes for URL '$url' (user agent: '$user_agent')")
|
||||||
die "no HTTP status codes for URL $url"
|
|
||||||
|
continue
|
||||||
fi
|
fi
|
||||||
|
|
||||||
local status
|
local status
|
||||||
|
|
||||||
|
local -i fail_count=0
|
||||||
|
|
||||||
|
# Check all HTTP status codes
|
||||||
for status in $http_statuses
|
for status in $http_statuses
|
||||||
do
|
do
|
||||||
# Ignore the following ranges of status codes:
|
# Ignore the following ranges of status codes:
|
||||||
@ -551,11 +630,26 @@ check_url()
|
|||||||
#
|
#
|
||||||
# See https://en.wikipedia.org/wiki/List_of_HTTP_status_codes
|
# See https://en.wikipedia.org/wiki/List_of_HTTP_status_codes
|
||||||
|
|
||||||
if ! echo "$status" | grep -qE "^(1[0-9][0-9]|2[0-9][0-9]|3[0-9][0-9]|405)"; then
|
{ grep -qE "^(1[0-9][0-9]|2[0-9][0-9]|3[0-9][0-9]|405)" <<< "$status"; ret=$?; } || true
|
||||||
echo "$url" >> "$invalid_file"
|
|
||||||
die "found HTTP error status codes for URL $url ($status)"
|
[ "$ret" -eq 0 ] && continue
|
||||||
fi
|
|
||||||
|
fail_count+=1
|
||||||
done
|
done
|
||||||
|
|
||||||
|
# If we didn't receive any unexpected HTTP status codes for
|
||||||
|
# this UA, the URL is valid so we don't need to check with any
|
||||||
|
# further UAs, so clear any (transitory) errors we've
|
||||||
|
# recorded.
|
||||||
|
[ "$fail_count" -eq 0 ] && errors=() && break
|
||||||
|
|
||||||
|
echo "$url" >> "$invalid_file"
|
||||||
|
errors+=("found HTTP error status codes for URL $url (status: '$status', user agent: '$user_agent')")
|
||||||
|
done
|
||||||
|
|
||||||
|
[ "${#errors}" = 0 ] && return 0
|
||||||
|
|
||||||
|
die "failed to check URL '$url': errors: '${errors[*]}'"
|
||||||
}
|
}
|
||||||
|
|
||||||
# Perform basic checks on documentation files
|
# Perform basic checks on documentation files
|
||||||
@ -647,6 +741,7 @@ static_check_docs()
|
|||||||
# is necessary to guarantee that all docs are referenced.
|
# is necessary to guarantee that all docs are referenced.
|
||||||
md_docs_to_check="$all_docs"
|
md_docs_to_check="$all_docs"
|
||||||
|
|
||||||
|
command -v kata-check-markdown &>/dev/null ||\
|
||||||
(cd "${test_dir}" && make -C cmd/check-markdown)
|
(cd "${test_dir}" && make -C cmd/check-markdown)
|
||||||
|
|
||||||
command -v kata-check-markdown &>/dev/null || \
|
command -v kata-check-markdown &>/dev/null || \
|
||||||
@ -810,7 +905,10 @@ static_check_docs()
|
|||||||
|
|
||||||
popd
|
popd
|
||||||
|
|
||||||
[ $docs_failed -eq 0 ] || die "spell check failed, See https://github.com/kata-containers/kata-containers/blob/main/docs/Documentation-Requirements.md#spelling for more information."
|
[ $docs_failed -eq 0 ] || {
|
||||||
|
url='https://github.com/kata-containers/kata-containers/blob/main/docs/Documentation-Requirements.md#spelling'
|
||||||
|
die "spell check failed, See $url for more information."
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static_check_eof()
|
static_check_eof()
|
||||||
|
Loading…
Reference in New Issue
Block a user