Various munger changes

* Add analytics munger w/ munge heading * More link autofixes * Allow running a subset of munges * Fix repo root detection * Only process non-preformatted blocks * Gendocs no longer adds the analytics link; mungedocs does that in a second pass.
2025-09-14 21:53:52 +00:00 · 2015-07-10 12:29:40 -07:00
parent a41f508451
commit bf77ecc3a9
7 changed files with 361 additions and 21 deletions
--- a/cmd/mungedocs/analytics.go
+++ b/cmd/mungedocs/analytics.go
@@ -0,0 +1,59 @@
+/*
+Copyright 2015 The Kubernetes Authors All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package main
+
+import (
+	"bytes"
+	"fmt"
+	"os"
+	"regexp"
+)
+
+var (
+	beginMungeExp = regexp.QuoteMeta(beginMungeTag("GENERATED_ANALYTICS"))
+	endMungeExp   = regexp.QuoteMeta(endMungeTag("GENERATED_ANALYTICS"))
+	analyticsExp  = regexp.QuoteMeta("[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/") +
+		"[^?]*" +
+		regexp.QuoteMeta("?pixel)]()")
+
+	// Matches the analytics blurb, with or without the munge headers.
+	analyticsRE = regexp.MustCompile(`[\n]*` + analyticsExp + `[\n]?` +
+		`|` + `[\n]*` + beginMungeExp + `[^<]*` + endMungeExp + `[\n]*`)
+)
+
+// This adds the analytics link to every .md file.
+func checkAnalytics(fileName string, fileBytes []byte) (output []byte, err error) {
+	fileName = makeRepoRelative(fileName)
+	desired := fmt.Sprintf(`
+
+
+`+beginMungeTag("GENERATED_ANALYTICS")+`
+[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/%s?pixel)]()
+`+endMungeTag("GENERATED_ANALYTICS")+`
+`, fileName)
+	if !analyticsRE.MatchString(desired) {
+		fmt.Printf("%q does not match %q", analyticsRE.String(), desired)
+		os.Exit(1)
+	}
+	//output = replaceNonPreformattedRegexp(fileBytes, analyticsRE, func(in []byte) []byte {
+	output = analyticsRE.ReplaceAllFunc(fileBytes, func(in []byte) []byte {
+		return []byte{}
+	})
+	output = bytes.TrimRight(output, "\n")
+	output = append(output, []byte(desired)...)
+	return output, nil
+}
--- a/cmd/mungedocs/analytics_test.go
+++ b/cmd/mungedocs/analytics_test.go
@@ -0,0 +1,85 @@
+/*
+Copyright 2015 The Kubernetes Authors All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package main
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestAnalytics(t *testing.T) {
+	var cases = []struct {
+		in  string
+		out string
+	}{
+		{`aoeu`, `aoeu
+
+
+` + beginMungeTag("GENERATED_ANALYTICS") + `
+[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/path/to/file-name.md?pixel)]()
+` + endMungeTag("GENERATED_ANALYTICS") + `
+`},
+		{`aoeu
+
+
+[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/path/to/file-name.md?pixel)]()
+`, `aoeu
+
+
+` + beginMungeTag("GENERATED_ANALYTICS") + `
+[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/path/to/file-name.md?pixel)]()
+` + endMungeTag("GENERATED_ANALYTICS") + `
+`},
+		{`aoeu
+
+` + beginMungeTag("GENERATED_ANALYTICS") + `
+[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/path/to/file-name.md?pixel)]()
+` + endMungeTag("GENERATED_ANALYTICS") + `
+`, `aoeu
+
+
+` + beginMungeTag("GENERATED_ANALYTICS") + `
+[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/path/to/file-name.md?pixel)]()
+` + endMungeTag("GENERATED_ANALYTICS") + `
+`},
+		{`aoeu
+
+
+[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/path/to/file-name.md?pixel)]()
+
+
+
+` + beginMungeTag("GENERATED_ANALYTICS") + `
+[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/path/to/file-name.md?pixel)]()
+` + endMungeTag("GENERATED_ANALYTICS") + `
+`, `aoeu
+
+
+` + beginMungeTag("GENERATED_ANALYTICS") + `
+[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/path/to/file-name.md?pixel)]()
+` + endMungeTag("GENERATED_ANALYTICS") + `
+`},
+	}
+	for _, c := range cases {
+		out, err := checkAnalytics("path/to/file-name.md", []byte(c.in))
+		assert.NoError(t, err)
+		if string(out) != c.out {
+			t.Errorf("Expected \n\n%v\n\n but got \n\n%v\n\n", c.out, string(out))
+		}
+	}
+}
--- a/cmd/mungedocs/links.go
+++ b/cmd/mungedocs/links.go
@@ -38,7 +38,7 @@ func checkLinks(filePath string, fileBytes []byte) ([]byte, error) {
 	dir := path.Dir(filePath)
 	errors := []string{}

-	output := linkRE.ReplaceAllFunc(fileBytes, func(in []byte) (out []byte) {
+	output := replaceNonPreformattedRegexp(fileBytes, linkRE, func(in []byte) (out []byte) {
 		match := linkRE.FindSubmatch(in)
 		// match[0] is the entire expression; [1] is the visible text and [2] is the link text.
 		visibleText := string(match[1])
@@ -108,13 +108,23 @@ func checkLinks(filePath string, fileBytes []byte) ([]byte, error) {
 	return output, err
 }

-func makeRepoRelative(path string) string {
-	parts := strings.Split(path, "github.com/GoogleCloudPlatform/kubernetes/")
-	if len(parts) > 1 {
-		// Take out anything that is specific to the local filesystem.
-		return parts[1]
+func makeRepoRelative(filePath string) string {
+	realRoot := path.Join(*rootDir, *repoRoot) + "/"
+	return strings.TrimPrefix(filePath, realRoot)
+}
+
+// We have to append together before path.Clean will be able to tell that stuff
+// like ../docs isn't needed.
+func cleanPath(dirPath, linkPath string) string {
+	clean := path.Clean(path.Join(dirPath, linkPath))
+	if strings.HasPrefix(clean, dirPath+"/") {
+		out := strings.TrimPrefix(clean, dirPath+"/")
+		if out != linkPath {
+			fmt.Printf("%s -> %s\n", linkPath, out)
+		}
+		return out
 	}
-	return path
+	return linkPath
 }

 func checkPath(filePath, linkPath string) (newPath string, ok bool) {
@@ -126,18 +136,49 @@ func checkPath(filePath, linkPath string) (newPath string, ok bool) {
 			linkPath = linkPath[1:]
 		}
 	}
+	linkPath = cleanPath(dir, linkPath)

-	newPath = linkPath
-	for i := 0; i < 5; i++ {
-		// The file must exist.
-		target := path.Join(dir, newPath)
-		if info, err := os.Stat(target); err == nil {
-			if info.IsDir() {
-				return newPath + "/", true
-			}
-			return newPath, true
+	// Fast exit if the link is already correct.
+	if info, err := os.Stat(path.Join(dir, linkPath)); err == nil {
+		if info.IsDir() {
+			return linkPath + "/", true
+		}
+		return linkPath, true
+	}
+
+	for strings.HasPrefix(linkPath, "../") {
+		linkPath = strings.TrimPrefix(linkPath, "../")
+	}
+
+	// Fix - vs _ automatically
+	nameMungers := []func(string) string{
+		func(s string) string { return s },
+		func(s string) string { return strings.Replace(s, "-", "_", -1) },
+		func(s string) string { return strings.Replace(s, "_", "-", -1) },
+	}
+	// Fix being moved into/out of admin (replace "admin" with directory
+	// you're doing mass movements to/from).
+	pathMungers := []func(string) string{
+		func(s string) string { return s },
+		func(s string) string { return path.Join("admin", s) },
+		func(s string) string { return strings.TrimPrefix(s, "admin/") },
+	}
+
+	for _, namer := range nameMungers {
+		for _, pather := range pathMungers {
+			newPath = pather(namer(linkPath))
+			for i := 0; i < 7; i++ {
+				// The file must exist.
+				target := path.Join(dir, newPath)
+				if info, err := os.Stat(target); err == nil {
+					if info.IsDir() {
+						return newPath + "/", true
+					}
+					return newPath, true
+				}
+				newPath = path.Join("..", newPath)
+			}
 		}
-		newPath = path.Join("..", newPath)
 	}
 	return linkPath, false
 }
--- a/cmd/mungedocs/mungedocs.go
+++ b/cmd/mungedocs/mungedocs.go
@@ -30,8 +30,15 @@ import (
 )

 var (
-	verify  = flag.Bool("verify", false, "Exit with status 1 if files would have needed changes but do not change.")
-	rootDir = flag.String("root-dir", "", "Root directory containing documents to be processed.")
+	verify   = flag.Bool("verify", false, "Exit with status 1 if files would have needed changes but do not change.")
+	rootDir  = flag.String("root-dir", "", "Root directory containing documents to be processed.")
+	repoRoot = flag.String("repo-root", "..", `Appended to --root-dir to get the repository root.
+It's done this way so that generally you just have to set --root-dir.
+Examples:
+ * --root-dir=docs/ --repo-root=.. means the repository root is ./
+ * --root-dir=/usr/local/long/path/repo/docs/ --repo-root=.. means the repository root is /usr/local/long/path/repo/
+ * --root-dir=/usr/local/long/path/repo/docs/admin --repo-root=../.. means the repository root is /usr/local/long/path/repo/`)
+	skipMunges = flag.String("skip-munges", "", "Comma-separated list of munges to *not* run. Available munges are: "+availableMungeList)

 	ErrChangesNeeded = errors.New("mungedocs: changes required")

@@ -41,7 +48,15 @@ var (
 		{"table-of-contents", updateTOC},
 		{"check-links", checkLinks},
 		{"unversioned-warning", updateUnversionedWarning},
+		{"analytics", checkAnalytics},
 	}
+	availableMungeList = func() string {
+		names := []string{}
+		for _, m := range allMunges {
+			names = append(names, m.name)
+		}
+		return strings.Join(names, ",")
+	}()
 )

 // a munge processes a document, returning an updated document xor an error.
@@ -121,6 +136,30 @@ func newWalkFunc(fp *fileProcessor, changesNeeded *bool) filepath.WalkFunc {
 	}
 }

+func wantedMunges() (filtered []munge) {
+	skipList := strings.Split(*skipMunges, ",")
+	skipped := map[string]bool{}
+	for _, m := range skipList {
+		if len(m) > 0 {
+			skipped[m] = true
+		}
+	}
+	for _, m := range allMunges {
+		if !skipped[m.name] {
+			filtered = append(filtered, m)
+		} else {
+			// Remove from the map so we can verify that everything
+			// requested was in fact valid.
+			delete(skipped, m.name)
+		}
+	}
+	if len(skipped) != 0 {
+		fmt.Fprintf(os.Stderr, "ERROR: requested to skip %v, but these are not valid munges. (valid: %v)\n", skipped, availableMungeList)
+		os.Exit(1)
+	}
+	return filtered
+}
+
 func main() {
 	flag.Parse()

@@ -139,7 +178,7 @@ func main() {
 	}

 	fp := fileProcessor{
-		munges:     allMunges,
+		munges:     wantedMunges(),
 		verifyOnly: *verify,
 	}

--- a/cmd/mungedocs/util.go
+++ b/cmd/mungedocs/util.go
@@ -19,9 +19,15 @@ package main
 import (
 	"bytes"
 	"fmt"
+	"regexp"
 	"strings"
 )

+var (
+	// Finds all preformatted block start/stops.
+	preformatRE = regexp.MustCompile("^```.*")
+)
+
 // Splits a document up into a slice of lines.
 func splitLines(document []byte) []string {
 	lines := strings.Split(string(document), "\n")
@@ -110,3 +116,44 @@ func beginMungeTag(desc string) string {
 func endMungeTag(desc string) string {
 	return fmt.Sprintf("<!-- END MUNGE: %s -->", desc)
 }
+
+// Calls 'replace' for all sections of the document not in ``` / ``` blocks. So
+// that you don't have false positives inside those blocks.
+func replaceNonPreformatted(input []byte, replace func([]byte) []byte) []byte {
+	output := []byte(nil)
+	cur := []byte(nil)
+	keepBlock := true
+	// SplitAfter keeps the newline, so you don't have to worry about
+	// omitting it on the last line or anything. Also, the documentation
+	// claims it's unicode safe.
+	for _, line := range bytes.SplitAfter(input, []byte("\n")) {
+		if keepBlock {
+			if preformatRE.Match(line) {
+				cur = replace(cur)
+				output = append(output, cur...)
+				cur = []byte{}
+				keepBlock = false
+			}
+			cur = append(cur, line...)
+		} else {
+			cur = append(cur, line...)
+			if preformatRE.Match(line) {
+				output = append(output, cur...)
+				cur = []byte{}
+				keepBlock = true
+			}
+		}
+	}
+	if keepBlock {
+		cur = replace(cur)
+	}
+	output = append(output, cur...)
+	return output
+}
+
+// As above, but further uses exp to parse the non-preformatted sections.
+func replaceNonPreformattedRegexp(input []byte, exp *regexp.Regexp, replace func([]byte) []byte) []byte {
+	return replaceNonPreformatted(input, func(in []byte) []byte {
+		return exp.ReplaceAllFunc(in, replace)
+	})
+}
--- a/cmd/mungedocs/util_test.go
+++ b/cmd/mungedocs/util_test.go
@@ -17,6 +17,7 @@ limitations under the License.
 package main

 import (
+	"reflect"
 	"testing"

 	"github.com/stretchr/testify/assert"
@@ -108,3 +109,71 @@ func TestHasMacroBlock(t *testing.T) {
 		}
 	}
 }
+
+func TestReplaceNonPreformatted(t *testing.T) {
+	cases := []struct {
+		in  string
+		out string
+	}{
+		{"aoeu", ""},
+		{"aoeu\n```\naoeu\n```\naoeu", "```\naoeu\n```\n"},
+		{"ao\neu\n```\naoeu\n\n\n", "```\naoeu\n\n\n"},
+		{"aoeu ```aoeu``` aoeu", ""},
+	}
+
+	for i, c := range cases {
+		out := string(replaceNonPreformatted([]byte(c.in), func([]byte) []byte { return nil }))
+		if out != c.out {
+			t.Errorf("%v: got %q, wanted %q", i, out, c.out)
+		}
+	}
+}
+
+func TestReplaceNonPreformattedNoChange(t *testing.T) {
+	cases := []struct {
+		in string
+	}{
+		{"aoeu"},
+		{"aoeu\n```\naoeu\n```\naoeu"},
+		{"aoeu\n\n```\n\naoeu\n\n```\n\naoeu"},
+		{"ao\neu\n```\naoeu\n\n\n"},
+		{"aoeu ```aoeu``` aoeu"},
+		{"aoeu\n```\naoeu\n```"},
+		{"aoeu\n```\naoeu\n```\n"},
+		{"aoeu\n```\naoeu\n```\n\n"},
+	}
+
+	for i, c := range cases {
+		out := string(replaceNonPreformatted([]byte(c.in), func(in []byte) []byte { return in }))
+		if out != c.in {
+			t.Errorf("%v: got %q, wanted %q", i, out, c.in)
+		}
+	}
+}
+
+func TestReplaceNonPreformattedCallOrder(t *testing.T) {
+	cases := []struct {
+		in     string
+		expect []string
+	}{
+		{"aoeu", []string{"aoeu"}},
+		{"aoeu\n```\naoeu\n```\naoeu", []string{"aoeu\n", "aoeu"}},
+		{"aoeu\n\n```\n\naoeu\n\n```\n\naoeu", []string{"aoeu\n\n", "\naoeu"}},
+		{"ao\neu\n```\naoeu\n\n\n", []string{"ao\neu\n"}},
+		{"aoeu ```aoeu``` aoeu", []string{"aoeu ```aoeu``` aoeu"}},
+		{"aoeu\n```\naoeu\n```", []string{"aoeu\n", ""}},
+		{"aoeu\n```\naoeu\n```\n", []string{"aoeu\n", ""}},
+		{"aoeu\n```\naoeu\n```\n\n", []string{"aoeu\n", "\n"}},
+	}
+
+	for i, c := range cases {
+		got := []string{}
+		replaceNonPreformatted([]byte(c.in), func(in []byte) []byte {
+			got = append(got, string(in))
+			return in
+		})
+		if e, a := c.expect, got; !reflect.DeepEqual(e, a) {
+			t.Errorf("%v: got %q, wanted %q", i, a, e)
+		}
+	}
+}
--- a/hack/lib/util.sh
+++ b/hack/lib/util.sh
@@ -149,7 +149,7 @@ kube::util::gen-doc() {
      local link path
      path="${relative_doc_dest}/${file}"
      link=$(kube::util::analytics-link "${path}")
-      echo -e "\n${link}" >> "${tmpdir}/${file}"
+      echo -e "\n\n${link}" >> "${tmpdir}/${file}"
    fi
    # Remove all old generated files from the destination
    if [[ -e "${tmpdir}/${file}" ]]; then