mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-21 10:51:29 +00:00
drop scripts and files related to linkcheck
Signed-off-by: Davanum Srinivas <davanum@gmail.com>
This commit is contained in:
parent
8f269d6df2
commit
837d3b4ca2
@ -1,192 +0,0 @@
|
|||||||
/*
|
|
||||||
Copyright 2015 The Kubernetes Authors.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
// This tool extracts the links from types.go and .md files, visits the link and
|
|
||||||
// checks the status code of the response.
|
|
||||||
// Usage:
|
|
||||||
// $ linkcheck --root-dir=${ROOT}
|
|
||||||
|
|
||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"net/http"
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
"regexp"
|
|
||||||
"strconv"
|
|
||||||
"strings"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/mvdan/xurls"
|
|
||||||
flag "github.com/spf13/pflag"
|
|
||||||
)
|
|
||||||
|
|
||||||
var (
|
|
||||||
rootDir = flag.String("root-dir", "", "Root directory containing documents to be processed.")
|
|
||||||
fileSuffix = flag.StringSlice("file-suffix", []string{"types.go", ".md"}, "suffix of files to be checked")
|
|
||||||
// URLs matching the patterns in the regWhiteList won't be checked. Patterns
|
|
||||||
// of dummy URLs should be added to the list to avoid false alerts. Also,
|
|
||||||
// patterns of URLs that we don't care about can be added here to improve
|
|
||||||
// efficiency.
|
|
||||||
regWhiteList = []*regexp.Regexp{
|
|
||||||
regexp.MustCompile(`https://kubernetes-site\.appspot\.com`),
|
|
||||||
// skip url that doesn't start with an English alphabet, e.g., URLs with IP addresses.
|
|
||||||
regexp.MustCompile(`https?://[^A-Za-z].*`),
|
|
||||||
regexp.MustCompile(`https?://localhost.*`),
|
|
||||||
}
|
|
||||||
// URLs listed in the fullURLWhiteList won't be checked. This separated from
|
|
||||||
// the RegWhiteList to improve efficiency. This list includes dummy URLs that
|
|
||||||
// are hard to be generalized by a regex, and URLs that will cause false alerts.
|
|
||||||
fullURLWhiteList = map[string]struct{}{
|
|
||||||
"http://github.com/some/repo.git": {},
|
|
||||||
// This URL returns 404 when visited by this tool, but it works fine if visited by a browser.
|
|
||||||
"http://stackoverflow.com/questions/ask?tags=kubernetes": {},
|
|
||||||
"https://github.com/$YOUR_GITHUB_USERNAME/kubernetes.git": {},
|
|
||||||
"https://github.com/$YOUR_GITHUB_USERNAME/kubernetes": {},
|
|
||||||
"http://storage.googleapis.com/kubernetes-release/release/v${K8S_VERSION}/bin/darwin/amd64/kubectl": {},
|
|
||||||
// It seems this server expects certain User-Agent value, it works fine with Chrome, but returns 404 if we issue a plain cURL to it.
|
|
||||||
"http://supervisord.org/": {},
|
|
||||||
"http://kubernetes.io/vX.Y/docs": {},
|
|
||||||
"http://kubernetes.io/vX.Y/docs/": {},
|
|
||||||
"http://kubernetes.io/vX.Y/": {},
|
|
||||||
}
|
|
||||||
|
|
||||||
visitedURLs = map[string]struct{}{}
|
|
||||||
htmlpreviewReg = regexp.MustCompile(`https://htmlpreview\.github\.io/\?`)
|
|
||||||
httpOrhttpsReg = regexp.MustCompile(`https?.*`)
|
|
||||||
)
|
|
||||||
|
|
||||||
func newWalkFunc(invalidLink *bool, client *http.Client) filepath.WalkFunc {
|
|
||||||
return func(filePath string, info os.FileInfo, initErr error) error {
|
|
||||||
hasSuffix := false
|
|
||||||
for _, suffix := range *fileSuffix {
|
|
||||||
hasSuffix = hasSuffix || strings.HasSuffix(info.Name(), suffix)
|
|
||||||
}
|
|
||||||
if !hasSuffix {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
fileBytes, err := os.ReadFile(filePath)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
foundInvalid := false
|
|
||||||
allURLs := xurls.Strict.FindAll(fileBytes, -1)
|
|
||||||
fmt.Fprintf(os.Stdout, "\nChecking file %s\n", filePath)
|
|
||||||
URL:
|
|
||||||
for _, URL := range allURLs {
|
|
||||||
// Don't check non http/https URL
|
|
||||||
if !httpOrhttpsReg.Match(URL) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
for _, whiteURL := range regWhiteList {
|
|
||||||
if whiteURL.Match(URL) {
|
|
||||||
continue URL
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if _, found := fullURLWhiteList[string(URL)]; found {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
// remove the htmlpreview Prefix
|
|
||||||
processedURL := htmlpreviewReg.ReplaceAll(URL, []byte{})
|
|
||||||
|
|
||||||
// check if we have visited the URL.
|
|
||||||
if _, found := visitedURLs[string(processedURL)]; found {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
visitedURLs[string(processedURL)] = struct{}{}
|
|
||||||
|
|
||||||
retry := 0
|
|
||||||
const maxRetry int = 3
|
|
||||||
backoff := 100
|
|
||||||
for retry < maxRetry {
|
|
||||||
fmt.Fprintf(os.Stdout, "Visiting %s\n", string(processedURL))
|
|
||||||
// Use verb HEAD to increase efficiency. However, some servers
|
|
||||||
// do not handle HEAD well, so we need to try a GET to avoid
|
|
||||||
// false alert.
|
|
||||||
resp, err := client.Head(string(processedURL))
|
|
||||||
// URLs with mock host or mock port will cause error. If we report
|
|
||||||
// the error here, people need to add the mock URL to the white
|
|
||||||
// list every time they add a mock URL, which will be a maintenance
|
|
||||||
// nightmare. Hence, we decide to only report 404 to catch the
|
|
||||||
// cases where host and port are legit, but path is not, which
|
|
||||||
// is the most common mistake in our docs.
|
|
||||||
if err != nil {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
// This header is used in 301, 429 and 503.
|
|
||||||
// https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Retry-After
|
|
||||||
// And Go client will follow redirects automatically so the 301 check is probably unnecessary.
|
|
||||||
if resp.StatusCode == http.StatusTooManyRequests || resp.StatusCode == http.StatusServiceUnavailable {
|
|
||||||
retryAfter := resp.Header.Get("Retry-After")
|
|
||||||
if seconds, err := strconv.Atoi(retryAfter); err == nil {
|
|
||||||
backoff = seconds + 10
|
|
||||||
}
|
|
||||||
fmt.Fprintf(os.Stderr, "Got %d visiting %s, retry after %d seconds.\n", resp.StatusCode, string(URL), backoff)
|
|
||||||
time.Sleep(time.Duration(backoff) * time.Second)
|
|
||||||
backoff *= 2
|
|
||||||
retry++
|
|
||||||
} else if resp.StatusCode == http.StatusNotFound {
|
|
||||||
// We only check for 404 error for now. 401, 403 errors are hard to handle.
|
|
||||||
|
|
||||||
// We need to try a GET to avoid false alert.
|
|
||||||
resp, err = client.Get(string(processedURL))
|
|
||||||
if err != nil {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
if resp.StatusCode != http.StatusNotFound {
|
|
||||||
continue URL
|
|
||||||
}
|
|
||||||
|
|
||||||
foundInvalid = true
|
|
||||||
fmt.Fprintf(os.Stderr, "Failed: in file %s, Got %d visiting %s\n", filePath, resp.StatusCode, string(URL))
|
|
||||||
break
|
|
||||||
} else {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if retry == maxRetry {
|
|
||||||
foundInvalid = true
|
|
||||||
fmt.Fprintf(os.Stderr, "Failed: in file %s, still got 429 visiting %s after %d retries\n", filePath, string(URL), maxRetry)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if foundInvalid {
|
|
||||||
*invalidLink = true
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
flag.Parse()
|
|
||||||
|
|
||||||
if *rootDir == "" {
|
|
||||||
flag.Usage()
|
|
||||||
os.Exit(2)
|
|
||||||
}
|
|
||||||
client := http.Client{
|
|
||||||
Timeout: time.Duration(5 * time.Second),
|
|
||||||
}
|
|
||||||
invalidLink := false
|
|
||||||
if err := filepath.Walk(*rootDir, newWalkFunc(&invalidLink, &client)); err != nil {
|
|
||||||
fmt.Fprintf(os.Stderr, "Fail: %v.\n", err)
|
|
||||||
os.Exit(2)
|
|
||||||
}
|
|
||||||
if invalidLink {
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
}
|
|
@ -32,7 +32,6 @@ source "${KUBE_ROOT}/third_party/forked/shell2junit/sh2ju.sh"
|
|||||||
# Excluded check patterns are always skipped.
|
# Excluded check patterns are always skipped.
|
||||||
EXCLUDED_PATTERNS=(
|
EXCLUDED_PATTERNS=(
|
||||||
"verify-all.sh" # this script calls the make rule and would cause a loop
|
"verify-all.sh" # this script calls the make rule and would cause a loop
|
||||||
"verify-linkcheck.sh" # runs in separate Jenkins job once per day due to high network usage
|
|
||||||
"verify-*-dockerized.sh" # Don't run any scripts that intended to be run dockerized
|
"verify-*-dockerized.sh" # Don't run any scripts that intended to be run dockerized
|
||||||
"verify-govet-levee.sh" # Do not run levee analysis by default while KEP-1933 implementation is in alpha.
|
"verify-govet-levee.sh" # Do not run levee analysis by default while KEP-1933 implementation is in alpha.
|
||||||
"verify-licenses.sh" # runs in a separate job to monitor availability of the dependencies periodically
|
"verify-licenses.sh" # runs in a separate job to monitor availability of the dependencies periodically
|
||||||
|
@ -1,66 +0,0 @@
|
|||||||
#!/usr/bin/env bash
|
|
||||||
|
|
||||||
# Copyright 2014 The Kubernetes Authors.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
# This script extracts the links from types.go and .md files in pkg/api/,
|
|
||||||
# pkg/apis/ and docs/ directories, checks the status code of the response, and
|
|
||||||
# output the list of invalid links.
|
|
||||||
# Usage: `hack/verify-linkcheck.sh`.
|
|
||||||
|
|
||||||
set -o errexit
|
|
||||||
set -o nounset
|
|
||||||
set -o pipefail
|
|
||||||
|
|
||||||
KUBE_ROOT=$(dirname "${BASH_SOURCE[0]}")/..
|
|
||||||
source "${KUBE_ROOT}/hack/lib/init.sh"
|
|
||||||
|
|
||||||
kube::golang::setup_env
|
|
||||||
|
|
||||||
make -C "${KUBE_ROOT}" WHAT=cmd/linkcheck
|
|
||||||
|
|
||||||
linkcheck=$(kube::util::find-binary "linkcheck")
|
|
||||||
|
|
||||||
kube::util::ensure-temp-dir
|
|
||||||
OUTPUT="${KUBE_TEMP}"/linkcheck-output
|
|
||||||
cleanup() {
|
|
||||||
rm -rf "${OUTPUT}"
|
|
||||||
}
|
|
||||||
trap "cleanup" EXIT SIGINT
|
|
||||||
mkdir -p "$OUTPUT"
|
|
||||||
|
|
||||||
APIROOT="${KUBE_ROOT}/pkg/api/"
|
|
||||||
APISROOT="${KUBE_ROOT}/pkg/apis/"
|
|
||||||
DOCROOT="${KUBE_ROOT}/docs/"
|
|
||||||
ROOTS=("$APIROOT" "$APISROOT" "$DOCROOT")
|
|
||||||
found_invalid=false
|
|
||||||
for root in "${ROOTS[@]}"; do
|
|
||||||
"${linkcheck}" "--root-dir=${root}" 2> >(tee -a "${OUTPUT}/error" >&2) && ret=0 || ret=$?
|
|
||||||
if [[ $ret -eq 1 ]]; then
|
|
||||||
echo "Failed: found invalid links in ${root}."
|
|
||||||
found_invalid=true
|
|
||||||
fi
|
|
||||||
if [[ $ret -gt 1 ]]; then
|
|
||||||
echo "Error running linkcheck"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
|
|
||||||
if [ ${found_invalid} = true ]; then
|
|
||||||
echo "Summary of invalid links:"
|
|
||||||
cat "${OUTPUT}/error"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# ex: ts=2 sw=2 et filetype=sh
|
|
Loading…
Reference in New Issue
Block a user