From 837d3b4ca25c75c62fb9da9edfbe4d62a40d9c1d Mon Sep 17 00:00:00 2001 From: Davanum Srinivas Date: Fri, 23 Sep 2022 13:55:45 -0400 Subject: [PATCH] drop scripts and files related to linkcheck Signed-off-by: Davanum Srinivas --- cmd/linkcheck/links.go | 192 -------------------------------------- hack/make-rules/verify.sh | 1 - hack/verify-linkcheck.sh | 66 ------------- 3 files changed, 259 deletions(-) delete mode 100644 cmd/linkcheck/links.go delete mode 100755 hack/verify-linkcheck.sh diff --git a/cmd/linkcheck/links.go b/cmd/linkcheck/links.go deleted file mode 100644 index 53122843377..00000000000 --- a/cmd/linkcheck/links.go +++ /dev/null @@ -1,192 +0,0 @@ -/* -Copyright 2015 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -// This tool extracts the links from types.go and .md files, visits the link and -// checks the status code of the response. -// Usage: -// $ linkcheck --root-dir=${ROOT} - -package main - -import ( - "fmt" - "net/http" - "os" - "path/filepath" - "regexp" - "strconv" - "strings" - "time" - - "github.com/mvdan/xurls" - flag "github.com/spf13/pflag" -) - -var ( - rootDir = flag.String("root-dir", "", "Root directory containing documents to be processed.") - fileSuffix = flag.StringSlice("file-suffix", []string{"types.go", ".md"}, "suffix of files to be checked") - // URLs matching the patterns in the regWhiteList won't be checked. Patterns - // of dummy URLs should be added to the list to avoid false alerts. Also, - // patterns of URLs that we don't care about can be added here to improve - // efficiency. - regWhiteList = []*regexp.Regexp{ - regexp.MustCompile(`https://kubernetes-site\.appspot\.com`), - // skip url that doesn't start with an English alphabet, e.g., URLs with IP addresses. - regexp.MustCompile(`https?://[^A-Za-z].*`), - regexp.MustCompile(`https?://localhost.*`), - } - // URLs listed in the fullURLWhiteList won't be checked. This separated from - // the RegWhiteList to improve efficiency. This list includes dummy URLs that - // are hard to be generalized by a regex, and URLs that will cause false alerts. - fullURLWhiteList = map[string]struct{}{ - "http://github.com/some/repo.git": {}, - // This URL returns 404 when visited by this tool, but it works fine if visited by a browser. - "http://stackoverflow.com/questions/ask?tags=kubernetes": {}, - "https://github.com/$YOUR_GITHUB_USERNAME/kubernetes.git": {}, - "https://github.com/$YOUR_GITHUB_USERNAME/kubernetes": {}, - "http://storage.googleapis.com/kubernetes-release/release/v${K8S_VERSION}/bin/darwin/amd64/kubectl": {}, - // It seems this server expects certain User-Agent value, it works fine with Chrome, but returns 404 if we issue a plain cURL to it. - "http://supervisord.org/": {}, - "http://kubernetes.io/vX.Y/docs": {}, - "http://kubernetes.io/vX.Y/docs/": {}, - "http://kubernetes.io/vX.Y/": {}, - } - - visitedURLs = map[string]struct{}{} - htmlpreviewReg = regexp.MustCompile(`https://htmlpreview\.github\.io/\?`) - httpOrhttpsReg = regexp.MustCompile(`https?.*`) -) - -func newWalkFunc(invalidLink *bool, client *http.Client) filepath.WalkFunc { - return func(filePath string, info os.FileInfo, initErr error) error { - hasSuffix := false - for _, suffix := range *fileSuffix { - hasSuffix = hasSuffix || strings.HasSuffix(info.Name(), suffix) - } - if !hasSuffix { - return nil - } - - fileBytes, err := os.ReadFile(filePath) - if err != nil { - return err - } - foundInvalid := false - allURLs := xurls.Strict.FindAll(fileBytes, -1) - fmt.Fprintf(os.Stdout, "\nChecking file %s\n", filePath) - URL: - for _, URL := range allURLs { - // Don't check non http/https URL - if !httpOrhttpsReg.Match(URL) { - continue - } - for _, whiteURL := range regWhiteList { - if whiteURL.Match(URL) { - continue URL - } - } - if _, found := fullURLWhiteList[string(URL)]; found { - continue - } - // remove the htmlpreview Prefix - processedURL := htmlpreviewReg.ReplaceAll(URL, []byte{}) - - // check if we have visited the URL. - if _, found := visitedURLs[string(processedURL)]; found { - continue - } - visitedURLs[string(processedURL)] = struct{}{} - - retry := 0 - const maxRetry int = 3 - backoff := 100 - for retry < maxRetry { - fmt.Fprintf(os.Stdout, "Visiting %s\n", string(processedURL)) - // Use verb HEAD to increase efficiency. However, some servers - // do not handle HEAD well, so we need to try a GET to avoid - // false alert. - resp, err := client.Head(string(processedURL)) - // URLs with mock host or mock port will cause error. If we report - // the error here, people need to add the mock URL to the white - // list every time they add a mock URL, which will be a maintenance - // nightmare. Hence, we decide to only report 404 to catch the - // cases where host and port are legit, but path is not, which - // is the most common mistake in our docs. - if err != nil { - break - } - // This header is used in 301, 429 and 503. - // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Retry-After - // And Go client will follow redirects automatically so the 301 check is probably unnecessary. - if resp.StatusCode == http.StatusTooManyRequests || resp.StatusCode == http.StatusServiceUnavailable { - retryAfter := resp.Header.Get("Retry-After") - if seconds, err := strconv.Atoi(retryAfter); err == nil { - backoff = seconds + 10 - } - fmt.Fprintf(os.Stderr, "Got %d visiting %s, retry after %d seconds.\n", resp.StatusCode, string(URL), backoff) - time.Sleep(time.Duration(backoff) * time.Second) - backoff *= 2 - retry++ - } else if resp.StatusCode == http.StatusNotFound { - // We only check for 404 error for now. 401, 403 errors are hard to handle. - - // We need to try a GET to avoid false alert. - resp, err = client.Get(string(processedURL)) - if err != nil { - break - } - if resp.StatusCode != http.StatusNotFound { - continue URL - } - - foundInvalid = true - fmt.Fprintf(os.Stderr, "Failed: in file %s, Got %d visiting %s\n", filePath, resp.StatusCode, string(URL)) - break - } else { - break - } - } - if retry == maxRetry { - foundInvalid = true - fmt.Fprintf(os.Stderr, "Failed: in file %s, still got 429 visiting %s after %d retries\n", filePath, string(URL), maxRetry) - } - } - if foundInvalid { - *invalidLink = true - } - return nil - } -} - -func main() { - flag.Parse() - - if *rootDir == "" { - flag.Usage() - os.Exit(2) - } - client := http.Client{ - Timeout: time.Duration(5 * time.Second), - } - invalidLink := false - if err := filepath.Walk(*rootDir, newWalkFunc(&invalidLink, &client)); err != nil { - fmt.Fprintf(os.Stderr, "Fail: %v.\n", err) - os.Exit(2) - } - if invalidLink { - os.Exit(1) - } -} diff --git a/hack/make-rules/verify.sh b/hack/make-rules/verify.sh index 7f56de372c9..db3d51c1c1f 100755 --- a/hack/make-rules/verify.sh +++ b/hack/make-rules/verify.sh @@ -32,7 +32,6 @@ source "${KUBE_ROOT}/third_party/forked/shell2junit/sh2ju.sh" # Excluded check patterns are always skipped. EXCLUDED_PATTERNS=( "verify-all.sh" # this script calls the make rule and would cause a loop - "verify-linkcheck.sh" # runs in separate Jenkins job once per day due to high network usage "verify-*-dockerized.sh" # Don't run any scripts that intended to be run dockerized "verify-govet-levee.sh" # Do not run levee analysis by default while KEP-1933 implementation is in alpha. "verify-licenses.sh" # runs in a separate job to monitor availability of the dependencies periodically diff --git a/hack/verify-linkcheck.sh b/hack/verify-linkcheck.sh deleted file mode 100755 index f4a2f032f85..00000000000 --- a/hack/verify-linkcheck.sh +++ /dev/null @@ -1,66 +0,0 @@ -#!/usr/bin/env bash - -# Copyright 2014 The Kubernetes Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# This script extracts the links from types.go and .md files in pkg/api/, -# pkg/apis/ and docs/ directories, checks the status code of the response, and -# output the list of invalid links. -# Usage: `hack/verify-linkcheck.sh`. - -set -o errexit -set -o nounset -set -o pipefail - -KUBE_ROOT=$(dirname "${BASH_SOURCE[0]}")/.. -source "${KUBE_ROOT}/hack/lib/init.sh" - -kube::golang::setup_env - -make -C "${KUBE_ROOT}" WHAT=cmd/linkcheck - -linkcheck=$(kube::util::find-binary "linkcheck") - -kube::util::ensure-temp-dir -OUTPUT="${KUBE_TEMP}"/linkcheck-output -cleanup() { - rm -rf "${OUTPUT}" -} -trap "cleanup" EXIT SIGINT -mkdir -p "$OUTPUT" - -APIROOT="${KUBE_ROOT}/pkg/api/" -APISROOT="${KUBE_ROOT}/pkg/apis/" -DOCROOT="${KUBE_ROOT}/docs/" -ROOTS=("$APIROOT" "$APISROOT" "$DOCROOT") -found_invalid=false -for root in "${ROOTS[@]}"; do - "${linkcheck}" "--root-dir=${root}" 2> >(tee -a "${OUTPUT}/error" >&2) && ret=0 || ret=$? - if [[ $ret -eq 1 ]]; then - echo "Failed: found invalid links in ${root}." - found_invalid=true - fi - if [[ $ret -gt 1 ]]; then - echo "Error running linkcheck" - exit 1 - fi -done - -if [ ${found_invalid} = true ]; then - echo "Summary of invalid links:" - cat "${OUTPUT}/error" - exit 1 -fi - -# ex: ts=2 sw=2 et filetype=sh