Various munger changes

* Add analytics munger w/ munge heading * More link autofixes * Allow running a subset of munges * Fix repo root detection * Only process non-preformatted blocks * Gendocs no longer adds the analytics link; mungedocs does that in a second pass.
2026-01-06 16:06:51 +00:00 · 2015-07-10 12:29:40 -07:00
parent a41f508451
commit bf77ecc3a9
7 changed files with 361 additions and 21 deletions
--- a/cmd/mungedocs/util.go
+++ b/cmd/mungedocs/util.go
@@ -19,9 +19,15 @@ package main
 import (
 	"bytes"
 	"fmt"
+	"regexp"
 	"strings"
 )

+var (
+	// Finds all preformatted block start/stops.
+	preformatRE = regexp.MustCompile("^```.*")
+)
+
 // Splits a document up into a slice of lines.
 func splitLines(document []byte) []string {
 	lines := strings.Split(string(document), "\n")
@@ -110,3 +116,44 @@ func beginMungeTag(desc string) string {
 func endMungeTag(desc string) string {
 	return fmt.Sprintf("<!-- END MUNGE: %s -->", desc)
 }
+
+// Calls 'replace' for all sections of the document not in ``` / ``` blocks. So
+// that you don't have false positives inside those blocks.
+func replaceNonPreformatted(input []byte, replace func([]byte) []byte) []byte {
+	output := []byte(nil)
+	cur := []byte(nil)
+	keepBlock := true
+	// SplitAfter keeps the newline, so you don't have to worry about
+	// omitting it on the last line or anything. Also, the documentation
+	// claims it's unicode safe.
+	for _, line := range bytes.SplitAfter(input, []byte("\n")) {
+		if keepBlock {
+			if preformatRE.Match(line) {
+				cur = replace(cur)
+				output = append(output, cur...)
+				cur = []byte{}
+				keepBlock = false
+			}
+			cur = append(cur, line...)
+		} else {
+			cur = append(cur, line...)
+			if preformatRE.Match(line) {
+				output = append(output, cur...)
+				cur = []byte{}
+				keepBlock = true
+			}
+		}
+	}
+	if keepBlock {
+		cur = replace(cur)
+	}
+	output = append(output, cur...)
+	return output
+}
+
+// As above, but further uses exp to parse the non-preformatted sections.
+func replaceNonPreformattedRegexp(input []byte, exp *regexp.Regexp, replace func([]byte) []byte) []byte {
+	return replaceNonPreformatted(input, func(in []byte) []byte {
+		return exp.ReplaceAllFunc(in, replace)
+	})
+}