diff --git a/tests/cmd/check-markdown/Makefile b/tests/cmd/check-markdown/Makefile new file mode 100644 index 0000000000..bb35e6ca6a --- /dev/null +++ b/tests/cmd/check-markdown/Makefile @@ -0,0 +1,32 @@ +# +# Copyright (c) 2017-2019 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +TARGET = kata-check-markdown +SOURCES = $(shell find . -type f 2>&1 | grep -E '.*\.go$$') + +VERSION := ${shell cat ./VERSION} +COMMIT_NO := $(shell git rev-parse HEAD 2> /dev/null || true) +COMMIT := $(if $(shell git status --porcelain --untracked-files=no),"${COMMIT_NO}-dirty","${COMMIT_NO}") + +BINDIR := $(GOPATH)/bin +DESTTARGET := $(abspath $(BINDIR)/$(TARGET)) + +default: install + +check: $(SOURCES) + go test -v ./... + +$(TARGET): $(SOURCES) + go build -o "$(TARGET)" -ldflags "-X main.name=${TARGET} -X main.commit=${COMMIT} -X main.version=${VERSION}" . + +install: $(TARGET) + install -d $(shell dirname $(DESTTARGET)) + install $(TARGET) $(DESTTARGET) + +clean: + rm -f $(TARGET) + +.PHONY: install clean diff --git a/tests/cmd/check-markdown/README.md b/tests/cmd/check-markdown/README.md new file mode 100644 index 0000000000..c8e3e4fc8d --- /dev/null +++ b/tests/cmd/check-markdown/README.md @@ -0,0 +1,57 @@ +# Overview + +The Kata Project comprises +[a number of GitHub repositories](https://github.com/kata-containers). +All these repositories contain documents written in +[GitHub-Flavoured Markdown](https://github.github.com/gfm) +format. + +[Linking in documents is strongly encouraged](https://github.com/kata-containers/kata-containers/blob/main/docs/Documentation-Requirements.md) +but due to the number of internal and external document links, it is easy for +mistakes to be made. Also, links can become stale when one document is updated +but the documents it depends on are not. + +# Tool summary + +The `kata-check-markdown` tool checks a markdown document to ensure all links +within it are valid. All internal links are checked and by default all +external links are also checked. The tool is able to suggest corrections for +some errors it finds. It can also generate a TOC (table of contents). + +# Usage + +## Basic + +```sh +$ kata-check-markdown check README.md +``` + +## Generate a TOC + +```sh +$ kata-check-markdown toc README.md +``` + +## List headings + +To list the document headings in the default `text` format: + +```sh +$ kata-check-markdown list headings README.md +``` + +## List links + +To list the links in a document in tab-separated format: + +```sh +$ kata-check-markdown list links --format tsv README.md +``` + +## Full details + +Lists all available options: + +```sh +$ kata-check-markdown -h +``` diff --git a/tests/cmd/check-markdown/VERSION b/tests/cmd/check-markdown/VERSION new file mode 100644 index 0000000000..8acdd82b76 --- /dev/null +++ b/tests/cmd/check-markdown/VERSION @@ -0,0 +1 @@ +0.0.1 diff --git a/tests/cmd/check-markdown/add.go b/tests/cmd/check-markdown/add.go new file mode 100644 index 0000000000..182b3593b0 --- /dev/null +++ b/tests/cmd/check-markdown/add.go @@ -0,0 +1,135 @@ +// +// Copyright (c) 2019 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package main + +import ( + "errors" + "fmt" + "path/filepath" + "strings" + + "github.com/sirupsen/logrus" +) + +// linkAddrToPath converts a link address into a path name. +func (d *Doc) linkAddrToPath(address string) (string, error) { + if address == "" { + return "", errors.New("need address") + } + + dir := filepath.Dir(d.Name) + + var file string + + // An "absolute link path" like this has been specified: + // + // [Foo](/absolute-link.md) + if strings.HasPrefix(address, absoluteLinkPrefix) { + if !fileExists(docRoot) { + return "", fmt.Errorf("document root %q does not exist", docRoot) + } + + file = filepath.Join(docRoot, address) + } else { + file = filepath.Join(dir, address) + } + + return file, nil +} + +// addHeading adds the specified heading to the document. +// +// Note that headings must be unique. +func (d *Doc) addHeading(heading Heading) error { + name := heading.Name + + if name == "" { + return d.Errorf("heading name cannot be blank: %+v", heading) + } + + if heading.LinkName == "" { + return d.Errorf("heading link name cannot be blank: %q (%+v)", + name, heading) + } + + if heading.Level <= 0 { + return d.Errorf("heading level must be atleast 1: %q (%+v)", + name, heading) + } + + if _, ok := d.Headings[name]; ok { + return d.Errorf("duplicate heading: %q (heading: %+v)", + name, heading) + } + + // Potentially change the ID to handle strange characters + // supported in links by GitHub. + id, err := createHeadingID(heading.Name) + if err != nil { + return err + } + + heading.LinkName = id + + d.Logger.WithField("heading", fmt.Sprintf("%+v", heading)).Debug("adding heading") + + d.Headings[name] = heading + + return nil +} + +// addLink potentially adds the specified link to the document. +// +// Note that links do not need to be unique: a document can contain +// multiple links with: +// +// - the same description and the same address. +// - the same description but with different addresses. +// - different descriptions but with the same address. +func (d *Doc) addLink(link Link) error { + addr := link.Address + + if link.ResolvedPath != "" { + addr = link.ResolvedPath + } + + if addr == "" { + return d.Errorf("link address cannot be blank: %+v", link) + } + + if link.Type == unknownLink { + return d.Errorf("BUG: link type invalid: %+v", link) + } + + // Not checked by default as magic "build status" / go report / godoc + // links don't have a description - they have a image only. + if strict && link.Description == "" { + return d.Errorf("link description cannot be blank: %q (%+v)", + addr, link) + } + + fields := logrus.Fields{ + "link": fmt.Sprintf("%+v", link), + } + + links := d.Links[addr] + + for _, l := range links { + if l.Type == link.Type { + d.Logger.WithFields(fields).Debug("not adding duplicate link") + + return nil + } + } + + d.Logger.WithFields(fields).Debug("adding link") + + links = append(links, link) + d.Links[addr] = links + + return nil +} diff --git a/tests/cmd/check-markdown/add_test.go b/tests/cmd/check-markdown/add_test.go new file mode 100644 index 0000000000..3e5866a0ae --- /dev/null +++ b/tests/cmd/check-markdown/add_test.go @@ -0,0 +1,191 @@ +// +// Copyright (c) 2019 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package main + +import ( + "fmt" + "os" + "path/filepath" + "testing" + + "github.com/sirupsen/logrus" + "github.com/stretchr/testify/assert" +) + +const ( + testFileMode = os.FileMode(0640) + testDirMode = os.FileMode(0750) + readmeName = "README.md" +) + +func createFile(file, contents string) error { + return os.WriteFile(file, []byte(contents), testFileMode) +} + +// makeDirs creates two directories below the specified base directory: one is +// an empty director named emptyDirName and the other is named readmeDirName +// and contains a markdown file called "README.md". +func makeDirs(assert *assert.Assertions, baseDir string, readmeDirName, emptyDirName string) { + readmeDir := filepath.Join(baseDir, readmeDirName) + err := os.MkdirAll(readmeDir, testDirMode) + assert.NoError(err) + + readme := filepath.Join(readmeDir, "README.md") + + err = createFile(readme, "# hello") + assert.NoError(err) + + emptyDir := filepath.Join(baseDir, emptyDirName) + err = os.MkdirAll(emptyDir, testDirMode) + assert.NoError(err) +} + +func TestDocAddHeading(t *testing.T) { + assert := assert.New(t) + + type testData struct { + heading Heading + expectError bool + } + + data := []testData{ + {Heading{"", "", "", -1}, true}, + {Heading{"Foo", "", "", -1}, true}, + {Heading{"Foo", "", "", 0}, true}, + {Heading{"Foo", "", "", 1}, true}, + {Heading{"Foo", "", "foo", -1}, true}, + {Heading{"Foo", "", "foo", 0}, true}, + + {Heading{"Foo", "", "foo", 1}, false}, + {Heading{"`Foo`", "`Foo`", "foo", 1}, false}, + } + + logger := logrus.WithField("test", "true") + + for i, d := range data { + doc := newDoc("foo", logger) + + assert.Empty(doc.Headings) + + msg := fmt.Sprintf("test[%d]: %+v\n", i, d) + + err := doc.addHeading(d.heading) + if d.expectError { + assert.Error(err, msg) + continue + } + + assert.NoError(err, msg) + assert.NotEmpty(doc.Headings, msg) + + name := d.heading.Name + + result, ok := doc.Headings[name] + assert.True(ok, msg) + + assert.Equal(d.heading, result, msg) + } +} + +func TestDocAddLink(t *testing.T) { + assert := assert.New(t) + + type testData struct { + link Link + expectError bool + } + + data := []testData{ + {Link{nil, "", "", "", -1}, true}, + {Link{nil, "foo", "", "", unknownLink}, true}, + + {Link{nil, "foo", "", "", internalLink}, false}, + {Link{nil, "http://google.com", "", "", urlLink}, false}, + {Link{nil, "https://google.com", "", "", urlLink}, false}, + {Link{nil, "mailto:me@somewhere.com", "", "", mailLink}, false}, + } + + logger := logrus.WithField("test", "true") + + for i, d := range data { + doc := newDoc("foo", logger) + + assert.Empty(doc.Links) + + msg := fmt.Sprintf("test[%d]: %+v\n", i, d) + + err := doc.addLink(d.link) + if d.expectError { + assert.Error(err, msg) + continue + } + + assert.NoError(err, msg) + assert.NotEmpty(doc.Links, msg) + addr := d.link.Address + + result := doc.Links[addr][0] + assert.Equal(result, d.link) + } +} + +func TestDocLinkAddrToPath(t *testing.T) { + assert := assert.New(t) + + dir, err := os.MkdirTemp("", "") + assert.NoError(err) + + cwd, err := os.Getwd() + assert.NoError(err) + defer os.Chdir(cwd) + + err = os.Chdir(dir) + assert.NoError(err) + defer os.RemoveAll(dir) + + savedDocRoot := docRoot + docRoot = dir + + defer func() { + docRoot = savedDocRoot + + }() + + mdFile := "bar.md" + mdPath := filepath.Join("/", mdFile) + actualMDPath := filepath.Join(dir, mdFile) + + type testData struct { + linkAddr string + expectedPath string + expectError bool + } + + data := []testData{ + {"", "", true}, + {"bar", "bar", false}, + {"bar.md", "bar.md", false}, + {mdPath, actualMDPath, false}, + } + + logger := logrus.WithField("test", "true") + doc := newDoc("foo", logger) + + for i, d := range data { + msg := fmt.Sprintf("test[%d]: %+v\n", i, d) + + result, err := doc.linkAddrToPath(d.linkAddr) + + if d.expectError { + assert.Error(err, msg) + continue + } + + assert.NoError(err, msg) + assert.Equal(d.expectedPath, result) + } +} diff --git a/tests/cmd/check-markdown/check.go b/tests/cmd/check-markdown/check.go new file mode 100644 index 0000000000..1bc038f88b --- /dev/null +++ b/tests/cmd/check-markdown/check.go @@ -0,0 +1,118 @@ +// +// Copyright (c) 2019 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package main + +import ( + "errors" + "fmt" +) + +// checkLink checks the validity of the specified link. If checkOtherDoc is +// true and the link is an external one, validate the link by considering the +// external document too. +func (d *Doc) checkLink(address string, link Link, checkOtherDoc bool) error { + if address == "" { + return errors.New("link address not set") + } + + switch link.Type { + case externalFile: + fallthrough + case externalLink: + // Check to ensure that referenced file actually exists + + var file string + + if link.ResolvedPath != "" { + file = link.ResolvedPath + } else { + file, _, err := splitLink(address) + if err != nil { + return err + } + + file, err = d.linkAddrToPath(file) + if err != nil { + return err + } + + if !fileExists(file) { + return d.Errorf("link type %v invalid: %q does not exist", + link.Type, + file) + } + } + + if link.Type == externalFile { + break + } + + // Check the other document + other, err := getDoc(file, d.Logger) + if err != nil { + return err + } + + if !checkOtherDoc { + break + } + + _, section, err := splitLink(address) + if err != nil { + return err + } + + if section == "" { + break + } + + if !other.hasHeading(section) { + return other.Errorf("invalid link %v", address) + } + + case internalLink: + // must be a link to an existing heading + + // search for a heading whose LinkName == name + found := d.headingByLinkName(address) + if found == nil { + msg := fmt.Sprintf("failed to find heading for link %q (%+v)", address, link) + + // There is a chance the link description matches the + // correct heading the link address refers to. In + // which case, we can derive the correct link address! + suggestion, err2 := createHeadingID(link.Description) + + if err2 == nil && suggestion != link.Address { + found = d.headingByLinkName(suggestion) + if found != nil { + msg = fmt.Sprintf("%s - correct link name is %q", msg, suggestion) + } + } + + return d.Errorf("%s", msg) + } + case urlLink: + // NOP - handled by xurls + } + + return nil +} + +// check performs all checks on the document. +func (d *Doc) check() error { + for name, linkList := range d.Links { + for _, link := range linkList { + err := d.checkLink(name, link, false) + if err != nil { + return err + } + } + } + + return nil +} diff --git a/tests/cmd/check-markdown/display.go b/tests/cmd/check-markdown/display.go new file mode 100644 index 0000000000..a6d2f7f1d0 --- /dev/null +++ b/tests/cmd/check-markdown/display.go @@ -0,0 +1,102 @@ +// +// Copyright (c) 2019 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package main + +import ( + "fmt" + "os" + "sort" + + "github.com/sirupsen/logrus" +) + +var outputFile = os.Stdout + +// displayHandler is an interface that all output display handlers +// (formatters) must implement. +type DisplayHandler interface { + DisplayHeadings(d *Doc) error + DisplayLinks(d *Doc) error +} + +// DisplayHandlers encapsulates the list of available display handlers. +type DisplayHandlers struct { + handlers map[string]DisplayHandler +} + +// handlers is a map of the available output format display handling +// implementations. +var handlers map[string]DisplayHandler + +// NewDisplayHandlers create a new DisplayHandler. +func NewDisplayHandlers(tsvSeparator string, disableHeader bool) *DisplayHandlers { + separator := rune('\t') + + if tsvSeparator != "" { + separator = rune(tsvSeparator[0]) + } + + if handlers == nil { + handlers = make(map[string]DisplayHandler) + + handlers[textFormat] = NewDisplayText(outputFile) + handlers[tsvFormat] = NewDisplayTSV(outputFile, separator, disableHeader) + } + + h := &DisplayHandlers{ + handlers: handlers, + } + + return h +} + +// find looks for a display handler corresponding to the specified format +func (d *DisplayHandlers) find(format string) DisplayHandler { + for f, handler := range d.handlers { + if f == format { + return handler + } + } + + return nil +} + +// Get returns a list of the available formatters (display handler names). +func (d *DisplayHandlers) Get() []string { + var formats []string + + for f := range d.handlers { + formats = append(formats, f) + } + + sort.Strings(formats) + + return formats +} + +func show(inputFilename string, logger *logrus.Entry, handler DisplayHandler, what DataToShow) error { + var fn func(*Doc) error + + switch what { + case showHeadings: + fn = handler.DisplayHeadings + case showLinks: + fn = handler.DisplayLinks + default: + return fmt.Errorf("unknown show option: %v", what) + } + + doc := newDoc(inputFilename, logger) + doc.ListMode = true + + err := doc.parse() + if err != nil { + return err + } + + return fn(doc) +} diff --git a/tests/cmd/check-markdown/display_text.go b/tests/cmd/check-markdown/display_text.go new file mode 100644 index 0000000000..e7e5f87b1e --- /dev/null +++ b/tests/cmd/check-markdown/display_text.go @@ -0,0 +1,57 @@ +// Copyright (c) 2019 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package main + +import ( + "fmt" + "os" +) + +type displayText struct { + file *os.File +} + +func NewDisplayText(file *os.File) DisplayHandler { + return &displayText{ + file: file, + } +} + +func (d *displayText) DisplayLinks(doc *Doc) error { + for _, linkList := range doc.Links { + for _, link := range linkList { + err := d.displayLink(link) + if err != nil { + return err + } + } + } + + return nil +} + +func (d *displayText) displayLink(l Link) error { + _, err := fmt.Fprintf(d.file, "%+v\n", l) + + return err +} + +func (d *displayText) DisplayHeadings(doc *Doc) error { + for _, h := range doc.Headings { + err := d.displayHeading(h) + if err != nil { + return err + } + } + + return nil +} + +func (d *displayText) displayHeading(h Heading) error { + _, err := fmt.Fprintf(d.file, "%+v\n", h) + + return err +} diff --git a/tests/cmd/check-markdown/display_tsv.go b/tests/cmd/check-markdown/display_tsv.go new file mode 100644 index 0000000000..f71ea91f3d --- /dev/null +++ b/tests/cmd/check-markdown/display_tsv.go @@ -0,0 +1,72 @@ +// Copyright (c) 2019 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package main + +import ( + "encoding/csv" + "os" +) + +type displayTSV struct { + writer *csv.Writer + disableHeader bool +} + +func NewDisplayTSV(file *os.File, separator rune, disableHeader bool) DisplayHandler { + tsv := &displayTSV{ + disableHeader: disableHeader, + } + + tsv.writer = csv.NewWriter(file) + + tsv.writer.Comma = separator + + return tsv +} + +func (d *displayTSV) DisplayLinks(doc *Doc) error { + if !d.disableHeader { + record := linkHeaderRecord() + if err := d.writer.Write(record); err != nil { + return err + } + } + + for _, linkList := range doc.Links { + for _, link := range linkList { + record := linkToRecord(link) + + if err := d.writer.Write(record); err != nil { + return err + } + } + } + + d.writer.Flush() + + return d.writer.Error() +} + +func (d *displayTSV) DisplayHeadings(doc *Doc) error { + if !d.disableHeader { + record := headingHeaderRecord() + if err := d.writer.Write(record); err != nil { + return err + } + } + + for _, l := range doc.Headings { + record := headingToRecord(l) + + if err := d.writer.Write(record); err != nil { + return err + } + } + + d.writer.Flush() + + return d.writer.Error() +} diff --git a/tests/cmd/check-markdown/doc.go b/tests/cmd/check-markdown/doc.go new file mode 100644 index 0000000000..a4d0efead3 --- /dev/null +++ b/tests/cmd/check-markdown/doc.go @@ -0,0 +1,76 @@ +// +// Copyright (c) 2019 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package main + +import ( + "errors" + "fmt" + + "github.com/sirupsen/logrus" +) + +// Details of the main document, and all other documents it references. +// Key: document name. +var docs map[string]*Doc + +func init() { + docs = make(map[string]*Doc) +} + +// newDoc creates a new document. +func newDoc(name string, logger *logrus.Entry) *Doc { + d := &Doc{ + Name: name, + Headings: make(map[string]Heading), + Links: make(map[string][]Link), + Parsed: false, + ShowTOC: false, + Logger: logger, + } + + d.Logger = logger.WithField("file", d.Name) + + // add to the hash + docs[name] = d + + return d +} + +// getDoc returns the Doc structure represented by the specified name, +// creating it and adding to the docs map if necessary. +func getDoc(name string, logger *logrus.Entry) (*Doc, error) { + if name == "" { + return &Doc{}, errors.New("need doc name") + } + + doc, ok := docs[name] + if ok { + return doc, nil + } + + return newDoc(name, logger), nil +} + +// hasHeading returns true if the specified heading exists for the document. +func (d *Doc) hasHeading(name string) bool { + return d.heading(name) != nil +} + +// Errorf is a convenience function to generate an error for this particular +// document. +func (d *Doc) Errorf(format string, args ...interface{}) error { + s := fmt.Sprintf(format, args...) + + return fmt.Errorf("file=%q: %s", d.Name, s) +} + +// String "pretty-prints" the specified document +// +// Just display the name as that is enough in text output. +func (d *Doc) String() string { + return d.Name +} diff --git a/tests/cmd/check-markdown/extract.go b/tests/cmd/check-markdown/extract.go new file mode 100644 index 0000000000..247bbdfbd3 --- /dev/null +++ b/tests/cmd/check-markdown/extract.go @@ -0,0 +1,93 @@ +// +// Copyright (c) 2019 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package main + +import ( + "fmt" + + bf "gopkg.in/russross/blackfriday.v2" +) + +// linkDescription extracts the description from the specified link node. +func linkDescription(l *bf.Node) (string, error) { + if err := checkNode(l, bf.Link); err != nil { + return "", err + } + + // A link description can be comprised of various elements so scan + // through them to build up the final value. + + text := "" + node := l.FirstChild + + for node != nil { + switch node.Type { + case bf.Code: + text += string(node.Literal) + case bf.Text: + text += string(node.Literal) + default: + logger.WithField("node", node).Debug("ignoring node") + } + + if node == l.LastChild { + break + } + + node = node.Next + } + + return text, nil +} + +// headingName extracts the heading name from the specified Heading node in +// plain text, and markdown. The latter is used for creating TOC's which need +// to include the original markdown value. +func headingName(h *bf.Node) (name, mdName string, err error) { + if err = checkNode(h, bf.Heading); err != nil { + return "", "", err + } + + // A heading can be comprised of various elements so scan + // through them to build up the final value. + + node := h.FirstChild + + for node != nil { + switch node.Type { + case bf.Code: + value := string(node.Literal) + + name += value + mdName += fmt.Sprintf("`%s`", value) + case bf.Text: + value := string(node.Literal) + + name += value + mdName += value + case bf.Link: + // yep, people do crazy things like adding links into titles! + descr, err := linkDescription(node) + if err != nil { + return "", "", err + } + + name += descr + mdName += descr + default: + logger.WithField("node", node).Debug("ignoring node") + } + + if node == h.LastChild { + break + } + + node = node.Next + } + + return name, mdName, nil +} diff --git a/tests/cmd/check-markdown/hack.go b/tests/cmd/check-markdown/hack.go new file mode 100644 index 0000000000..ee65cc3a9a --- /dev/null +++ b/tests/cmd/check-markdown/hack.go @@ -0,0 +1,69 @@ +// +// Copyright (c) 2019 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package main + +import ( + "strings" + + bf "gopkg.in/russross/blackfriday.v2" +) + +// forceCreateHeadings extracts "missed" headings from the specified node, +// returning a slice of the newly headings created (which need to be added by the +// caller). +// +// Alas, Black Friday isn't 100% reliable... +func (d *Doc) forceCreateHeadings(node *bf.Node) ([]Heading, error) { + if err := checkNode(node, bf.Text); err != nil { + return []Heading{}, err + } + + chunk := string(node.Literal) + + if chunk == "" { + // No text in this node + return []Heading{}, nil + } + + lines := strings.Split(chunk, "\n") + if len(lines) <= 1 { + // No headings lurking in this text node + return []Heading{}, nil + } + + var headings []Heading + + for _, line := range lines { + if !strings.HasPrefix(line, anchorPrefix) { + continue + } + + fields := strings.Split(line, anchorPrefix) + name := strings.Join(fields, "") + name = strings.TrimSpace(name) + + count := strings.Count(line, anchorPrefix) + + heading := Heading{ + Name: name, + Level: count, + } + + id, err := createHeadingID(heading.Name) + if err != nil { + return []Heading{}, err + } + + heading.LinkName = id + + headings = append(headings, heading) + + extraHeadings++ + } + + return headings, nil +} diff --git a/tests/cmd/check-markdown/heading.go b/tests/cmd/check-markdown/heading.go new file mode 100644 index 0000000000..31eec470e6 --- /dev/null +++ b/tests/cmd/check-markdown/heading.go @@ -0,0 +1,36 @@ +// +// Copyright (c) 2019 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package main + +import "fmt" + +// newHeading creates a new Heading. +func newHeading(name, mdName string, level int) (Heading, error) { + if name == "" { + return Heading{}, fmt.Errorf("heading name cannot be blank") + } + + if mdName == "" { + return Heading{}, fmt.Errorf("heading markdown name cannot be blank") + } + + linkName, err := createHeadingID(name) + if err != nil { + return Heading{}, err + } + + if level < 1 { + return Heading{}, fmt.Errorf("level needs to be atleast 1") + } + + return Heading{ + Name: name, + MDName: mdName, + LinkName: linkName, + Level: level, + }, nil +} diff --git a/tests/cmd/check-markdown/heading_test.go b/tests/cmd/check-markdown/heading_test.go new file mode 100644 index 0000000000..2d5d1a1296 --- /dev/null +++ b/tests/cmd/check-markdown/heading_test.go @@ -0,0 +1,65 @@ +// +// Copyright (c) 2019 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package main + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestNewHeading(t *testing.T) { + assert := assert.New(t) + + type testData struct { + headingName string + mdName string + expectedLinkName string + level int + expectError bool + } + + data := []testData{ + {"", "", "", -1, true}, + {"a", "", "", -1, true}, + {"a", "a", "", -1, true}, + {"a", "a", "", 0, true}, + {"a", "", "", 1, true}, + + {"a", "a", "a", 1, false}, + {"a-b", "`a-b`", "`a-b`", 1, false}, + {"a_b", "`a_b`", "`a_b`", 1, false}, + {"foo (json) bar", "foo `(json)` bar", "foo-json-bar", 1, false}, + {"func(json)", "`func(json)`", "funcjson", 1, false}, + {"?", "?", "", 1, false}, + {"a b", "a b", "a-b", 1, false}, + {"a - b", "a - b", "a---b", 1, false}, + {"a - b?", "a - b?", "a---b", 1, false}, + {"a - b.", "a - b.", "a---b", 1, false}, + {"a:b", "a:b", "ab", 1, false}, + {"a;b", "a;b", "ab", 1, false}, + {"a@b", "a@b", "ab", 1, false}, + {"a+b", "a+b", "ab", 1, false}, + {"a,b", "a,b", "ab", 1, false}, + } + + for i, d := range data { + msg := fmt.Sprintf("test[%d]: %+v\n", i, d) + + h, err := newHeading(d.headingName, d.mdName, d.level) + if d.expectError { + assert.Error(err, msg) + continue + } + + assert.Equal(h.Name, d.headingName, msg) + assert.Equal(h.MDName, d.mdName, msg) + assert.Equal(h.Level, d.level, msg) + assert.Equal(h.LinkName, d.expectedLinkName, msg) + } +} diff --git a/tests/cmd/check-markdown/link.go b/tests/cmd/check-markdown/link.go new file mode 100644 index 0000000000..fe6e5cca4c --- /dev/null +++ b/tests/cmd/check-markdown/link.go @@ -0,0 +1,122 @@ +// +// Copyright (c) 2019 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package main + +import ( + "errors" + "os" + "path/filepath" + "regexp" + "strings" +) + +// newLink creates a new Link. +func newLink(doc *Doc, address, description string) (Link, error) { + l := Link{ + Doc: doc, + Address: address, + Description: description, + } + + err := l.categorise() + if err != nil { + return Link{}, err + } + + return l, nil +} + +// categorise determines the type of Link. +func (l *Link) categorise() error { + address := l.Address + + // markdown file extension with optional link name ("#...") + const re = `\.md#*.*$` + + pattern := regexp.MustCompile(re) + + matched := pattern.MatchString(address) + + if strings.HasPrefix(address, "http:") { + l.Type = urlLink + } else if strings.HasPrefix(address, "https:") { + l.Type = urlLink + } else if strings.HasPrefix(address, "mailto:") { + l.Type = mailLink + } else if strings.HasPrefix(address, anchorPrefix) { + l.Type = internalLink + + // Remove the prefix to make a valid link address + address = strings.TrimPrefix(address, anchorPrefix) + l.Address = address + } else if matched { + l.Type = externalLink + + file, _, err := splitLink(address) + if err != nil { + return err + } + + file, err = l.Doc.linkAddrToPath(file) + if err != nil { + return err + } + + l.ResolvedPath = file + } else { + isREADME, err := l.handleImplicitREADME() + if err != nil { + return err + } + + if !isREADME { + // Link must be an external file, but not a markdown file. + l.Type = externalFile + } + } + + return nil +} + +// handleImplicitREADME determines if the specified link is an implicit link +// to a README document. +func (l *Link) handleImplicitREADME() (isREADME bool, err error) { + const readme = "README.md" + + address := l.Address + if address == "" { + return false, errors.New("need link address") + } + + file, err := l.Doc.linkAddrToPath(address) + if err != nil { + return false, err + } + + // The resolved path should exist as this is a local file. + st, err := os.Stat(file) + if err != nil { + return false, err + } + + if !st.IsDir() { + return false, nil + } + + // The file is a directory so try appending the implicit README file + // and see if that exists. + resolvedPath := filepath.Join(file, readme) + + success := fileExists(resolvedPath) + + if success { + l.Type = externalLink + l.ResolvedPath = resolvedPath + } + + return success, nil +} diff --git a/tests/cmd/check-markdown/link_test.go b/tests/cmd/check-markdown/link_test.go new file mode 100644 index 0000000000..c3651a2a0b --- /dev/null +++ b/tests/cmd/check-markdown/link_test.go @@ -0,0 +1,209 @@ +// +// Copyright (c) 2019 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package main + +import ( + "fmt" + "os" + "path/filepath" + "testing" + + "github.com/sirupsen/logrus" + "github.com/stretchr/testify/assert" +) + +// createLinkAndCategorise will create a link and categorise it. If +// createLinkManually is set, the link will be created "manually" (without the +// constructor) and categorise() called. If not set, the constructor will be +// used. +func createLinkAndCategorise(assert *assert.Assertions, createLinkManually bool) { + dir, err := os.MkdirTemp("", "") + assert.NoError(err) + + cwd, err := os.Getwd() + assert.NoError(err) + defer os.Chdir(cwd) + + err = os.Chdir(dir) + assert.NoError(err) + defer os.RemoveAll(dir) + + readmeDirName := "dir-with-readme" + emptyDirName := "empty" + makeDirs(assert, dir, readmeDirName, emptyDirName) + + readmeDirPath := filepath.Join(readmeDirName, readmeName) + + topLevelReadmeName := "top-level.md" + topLevelReadmeLink := filepath.Join("/", topLevelReadmeName) + + topLevelReadmePath := filepath.Join(dir, topLevelReadmeName) + + type testData struct { + linkAddress string + + expectedPath string + + expectedType LinkType + expectError bool + + // Set if expectedPath should be checked + checkPath bool + } + + docRoot = dir + + data := []testData{ + {"", "", -1, true, false}, + {"a", "", -1, true, false}, + {"a.b", "", -1, true, false}, + {"a#b", "", -1, true, false}, + + {"htt://foo", "", -1, true, false}, + {"HTTP://foo", "", -1, true, false}, + {"moohttp://foo", "", -1, true, false}, + {"mailto", "", -1, true, false}, + {"http", "", -1, true, false}, + {"https", "", -1, true, false}, + + {"http://foo", "", urlLink, false, false}, + {"https://foo/", "", urlLink, false, false}, + {"https://foo/bar", "", urlLink, false, false}, + {"mailto:me", "", mailLink, false, false}, + + {".", "", externalFile, false, false}, + {"/", "", externalFile, false, false}, + {emptyDirName, "", externalFile, false, false}, + + {readmeDirName, readmeDirPath, externalLink, false, true}, + {"foo.md", "foo.md", externalLink, false, true}, + {"foo.md#bar", "foo.md", externalLink, false, true}, + {topLevelReadmeLink, topLevelReadmePath, externalLink, false, true}, + } + + logger := logrus.WithField("test", "true") + description := "" + + for i, d := range data { + var link Link + var err error + + doc := newDoc("foo", logger) + + if createLinkManually { + link = Link{ + Doc: doc, + Address: d.linkAddress, + Description: description, + } + + err = link.categorise() + } else { + link, err = newLink(doc, d.linkAddress, description) + } + + msg := fmt.Sprintf("test[%d] manual-link: %v: %+v, link: %+v\n", i, createLinkManually, d, link) + + if d.expectError { + assert.Error(err, msg) + continue + } + + assert.NoError(err, msg) + + assert.Equal(link.Doc, doc) + assert.Equal(link.Address, d.linkAddress) + assert.Equal(link.Description, description) + assert.Equal(link.Type, d.expectedType) + + if d.checkPath { + assert.Equal(d.expectedPath, link.ResolvedPath) + } + } +} + +func TestNewLink(t *testing.T) { + assert := assert.New(t) + + createLinkAndCategorise(assert, false) +} + +func TestLinkCategorise(t *testing.T) { + assert := assert.New(t) + + createLinkAndCategorise(assert, true) +} + +func TestLinkHandleImplicitREADME(t *testing.T) { + assert := assert.New(t) + + dir, err := os.MkdirTemp("", "") + assert.NoError(err) + defer os.RemoveAll(dir) + + cwd, err := os.Getwd() + assert.NoError(err) + defer os.Chdir(cwd) + + err = os.Chdir(dir) + assert.NoError(err) + defer os.RemoveAll(dir) + + readmeDirName := "dir-with-readme" + emptyDirName := "empty" + makeDirs(assert, dir, readmeDirName, emptyDirName) + + readmePath := filepath.Join(readmeDirName, readmeName) + + emptyFileName := "empty-file" + + err = createFile(emptyFileName, "") + assert.NoError(err) + + type testData struct { + linkAddr string + expectedPath string + expectedType LinkType + isREADME bool + expectError bool + } + + data := []testData{ + {"", "", unknownLink, false, true}, + {"foo", "", unknownLink, false, true}, + {emptyFileName, "", unknownLink, false, false}, + {emptyDirName, "", unknownLink, false, false}, + {readmeDirName, readmePath, externalLink, true, false}, + } + + logger := logrus.WithField("test", "true") + + for i, d := range data { + doc := newDoc("foo", logger) + + link := Link{ + Doc: doc, + Address: d.linkAddr, + } + + msg := fmt.Sprintf("test[%d]: %+v\n", i, d) + + isREADME, err := link.handleImplicitREADME() + + if d.expectError { + assert.Error(err, msg) + continue + } + + assert.NoError(err, msg) + assert.Equal(isREADME, d.isREADME) + assert.Equal(isREADME, d.isREADME) + assert.Equal(link.Address, d.linkAddr) + assert.Equal(link.Type, d.expectedType) + assert.Equal(link.ResolvedPath, d.expectedPath) + } +} diff --git a/tests/cmd/check-markdown/main.go b/tests/cmd/check-markdown/main.go new file mode 100644 index 0000000000..e626a95d9d --- /dev/null +++ b/tests/cmd/check-markdown/main.go @@ -0,0 +1,348 @@ +// +// Copyright (c) 2019 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package main + +import ( + "errors" + "fmt" + "os" + "time" + + "github.com/sirupsen/logrus" + "github.com/urfave/cli" +) + +type DataToShow int + +const ( + // Character used (after an optional filename) before a heading ID. + anchorPrefix = "#" + + // Character used to signify an "absolute link path" which should + // expand to the value of the document root. + absoluteLinkPrefix = "/" + + showLinks DataToShow = iota + showHeadings DataToShow = iota + + textFormat = "text" + tsvFormat = "tsv" + defaultOutputFormat = textFormat + defaultSeparator = "\t" +) + +var ( + // set by the build + name = "" + version = "" + commit = "" + + strict = false + + // list entry character to use when generating TOCs + listPrefix = "*" + + logger *logrus.Entry + + errNeedFile = errors.New("need markdown file") +) + +// Black Friday sometimes chokes on markdown (I know!!), so record how many +// extra headings we found. +var extraHeadings int + +// Root directory used to handle "absolute link paths" that start with a slash +// to denote the "top directory", like this: +// +// [Foo](/absolute-link.md) +var docRoot string + +var notes = fmt.Sprintf(` + +NOTES: + +- The document root is used to handle markdown references that begin with %q, + denoting that the path that follows is an "absolute path" from the specified + document root path. + +- The order the document nodes are parsed internally is not known to + this program. This means that if multiple errors exist in the document, + running this tool multiple times will error one *one* of the errors, but not + necessarily the same one as last time. + +LIMITATIONS: + +- The default document root only works if this tool is run from the top-level + of a repository. + +`, absoluteLinkPrefix) + +var formatFlag = cli.StringFlag{ + Name: "format", + Usage: "display in specified format ('help' to show all)", + Value: defaultOutputFormat, +} + +var separatorFlag = cli.StringFlag{ + Name: "separator", + Usage: fmt.Sprintf("use the specified separator character (%s format only)", tsvFormat), + Value: defaultSeparator, +} + +var noHeaderFlag = cli.BoolFlag{ + Name: "no-header", + Usage: "disable display of header (if format supports one)", +} + +func init() { + logger = logrus.WithFields(logrus.Fields{ + "name": name, + "source": "check-markdown", + "version": version, + "commit": commit, + "pid": os.Getpid(), + }) + + logger.Logger.Formatter = &logrus.TextFormatter{ + TimestampFormat: time.RFC3339Nano, + //DisableColors: true, + } + + // Write to stdout to avoid upsetting CI systems that consider stderr + // writes as indicating an error. + logger.Logger.Out = os.Stdout +} + +func handleLogging(c *cli.Context) { + logLevel := logrus.InfoLevel + + if c.GlobalBool("debug") { + logLevel = logrus.DebugLevel + } + + logger.Logger.SetLevel(logLevel) +} + +func handleDoc(c *cli.Context, createTOC bool) error { + handleLogging(c) + + if c.NArg() == 0 { + return errNeedFile + } + + fileName := c.Args().First() + if fileName == "" { + return errNeedFile + } + + singleDocOnly := c.GlobalBool("single-doc-only") + + doc := newDoc(fileName, logger) + doc.ShowTOC = createTOC + + if createTOC { + // Only makes sense to generate a single TOC! + singleDocOnly = true + } + + // Parse the main document first + err := doc.parse() + if err != nil { + return err + } + + if singleDocOnly && len(docs) > 1 { + doc.Logger.Debug("Not checking referenced files at user request") + return nil + } + + // Now handle all other docs that the main doc references. + // This requires care to avoid recursion. + for { + count := len(docs) + parsed := 0 + for _, doc := range docs { + if doc.Parsed { + // Document has already been handled + parsed++ + continue + } + + if err := doc.parse(); err != nil { + return err + } + } + + if parsed == count { + break + } + } + + err = handleIntraDocLinks() + if err != nil { + return err + } + + if !createTOC { + doc.Logger.Info("Checked file") + doc.showStats() + } + + count := len(docs) + + if count > 1 { + // Update to ignore main document + count-- + + doc.Logger.WithField("reference-document-count", count).Info("Checked referenced files") + + for _, d := range docs { + if d.Name == doc.Name { + // Ignore main document + continue + } + + fmt.Printf("\t%q\n", d.Name) + } + } + + // Highlight blackfriday deficiencies + if !doc.ShowTOC && extraHeadings > 0 { + doc.Logger.WithField("extra-heading-count", extraHeadings).Debug("Found extra headings") + } + + return nil +} + +// commonListHandler is used to handle all list operations. +func commonListHandler(context *cli.Context, what DataToShow) error { + handleLogging(context) + + handlers := NewDisplayHandlers(context.String("separator"), context.Bool("no-header")) + + format := context.String("format") + if format == "help" { + availableFormats := handlers.Get() + + for _, format := range availableFormats { + fmt.Fprintf(outputFile, "%s\n", format) + } + + return nil + } + + handler := handlers.find(format) + if handler == nil { + return fmt.Errorf("no handler for format %q", format) + } + + if context.NArg() == 0 { + return errNeedFile + } + + file := context.Args().Get(0) + + return show(file, logger, handler, what) +} + +func realMain() error { + cwd, err := os.Getwd() + if err != nil { + return err + } + + docRoot = cwd + + cli.VersionPrinter = func(c *cli.Context) { + fmt.Fprintln(os.Stdout, c.App.Version) + } + + cli.AppHelpTemplate = fmt.Sprintf(`%s%s`, cli.AppHelpTemplate, notes) + + app := cli.NewApp() + app.Name = name + app.Version = fmt.Sprintf("%s %s (commit %v)", name, version, commit) + app.Description = "Tool to check GitHub-Flavoured Markdown (GFM) format documents" + app.Usage = app.Description + app.UsageText = fmt.Sprintf("%s [options] file ...", app.Name) + app.Flags = []cli.Flag{ + cli.BoolFlag{ + Name: "debug, d", + Usage: "display debug information", + }, + cli.StringFlag{ + Name: "doc-root, r", + Usage: "specify document root", + Value: docRoot, + }, + cli.BoolFlag{ + Name: "single-doc-only, o", + Usage: "only check primary (specified) document", + }, + cli.BoolFlag{ + Name: "strict, s", + Usage: "enable strict mode", + }, + } + + app.Commands = []cli.Command{ + { + Name: "check", + Usage: "perform tests on the specified document", + Description: "Exit code denotes success", + Action: func(c *cli.Context) error { + return handleDoc(c, false) + }, + }, + { + Name: "toc", + Usage: "display a markdown Table of Contents", + Action: func(c *cli.Context) error { + return handleDoc(c, true) + }, + }, + { + Name: "list", + Usage: "display particular parts of the document", + Subcommands: []cli.Command{ + { + Name: "headings", + Usage: "display headings", + Flags: []cli.Flag{ + formatFlag, + noHeaderFlag, + separatorFlag, + }, + Action: func(c *cli.Context) error { + return commonListHandler(c, showHeadings) + }, + }, + { + Name: "links", + Usage: "display links", + Flags: []cli.Flag{ + formatFlag, + noHeaderFlag, + separatorFlag, + }, + Action: func(c *cli.Context) error { + return commonListHandler(c, showLinks) + }, + }, + }, + }, + } + + return app.Run(os.Args) +} + +func main() { + err := realMain() + if err != nil { + logger.Fatalf("%v", err) + } +} diff --git a/tests/cmd/check-markdown/node.go b/tests/cmd/check-markdown/node.go new file mode 100644 index 0000000000..5d8d104a36 --- /dev/null +++ b/tests/cmd/check-markdown/node.go @@ -0,0 +1,115 @@ +// +// Copyright (c) 2019 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package main + +import ( + bf "gopkg.in/russross/blackfriday.v2" +) + +// handleNode processes the specified node. +func (d *Doc) handleNode(node *bf.Node) error { + var err error + + switch node.Type { + case bf.Heading: + err = d.handleHeading(node) + case bf.Link: + err = d.handleLink(node) + case bf.Text: + // handle blackfriday deficiencies + headings, err := d.forceCreateHeadings(node) + if err != nil { + return err + } + + for _, heading := range headings { + err := d.addHeading(heading) + if err != nil { + return err + } + } + + default: + return nil + } + + return err +} + +// makeHeading creates a heading from the specified node. +func (d *Doc) makeHeading(node *bf.Node) (Heading, error) { + if err := checkNode(node, bf.Heading); err != nil { + return Heading{}, err + } + + name, mdName, err := headingName(node) + if err != nil { + return Heading{}, d.Errorf("failed to get heading name: %v", err) + } + + data := node.HeadingData + + heading, err := newHeading(name, mdName, data.Level) + if err != nil { + return Heading{}, err + } + + return heading, nil +} + +// handleHeading processes the heading represented by the specified node. +func (d *Doc) handleHeading(node *bf.Node) error { + if err := checkNode(node, bf.Heading); err != nil { + return err + } + + heading, err := d.makeHeading(node) + if err != nil { + return err + } + + return d.addHeading(heading) +} + +func (d *Doc) handleLink(node *bf.Node) error { + if err := checkNode(node, bf.Link); err != nil { + return err + } + + address := string(node.Destination) + + description, err := linkDescription(node) + if err != nil { + return d.Errorf("failed to get link name: %v", err) + } + + link, err := newLink(d, address, description) + if err != nil { + return err + } + + return d.addLink(link) +} + +// handleIntraDocLinks checks the links between documents are correct. +// +// For example, if a document refers to "foo.md#section-bar", this function +// will ensure that "section-bar" exists in external file "foo.md". +func handleIntraDocLinks() error { + for _, doc := range docs { + for addr, linkList := range doc.Links { + for _, link := range linkList { + err := doc.checkLink(addr, link, true) + if err != nil { + return doc.Errorf("intra-doc link invalid: %v", err) + } + } + } + } + + return nil +} diff --git a/tests/cmd/check-markdown/parse.go b/tests/cmd/check-markdown/parse.go new file mode 100644 index 0000000000..930a3e8926 --- /dev/null +++ b/tests/cmd/check-markdown/parse.go @@ -0,0 +1,100 @@ +// +// Copyright (c) 2019 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package main + +import ( + "fmt" + "os" + "strings" + + bf "gopkg.in/russross/blackfriday.v2" +) + +// List of errors found by visitor. Used as the visitor cannot return an error +// directly. +var errorList []error + +func (d *Doc) parse() error { + if !d.ShowTOC && !d.ListMode { + d.Logger.Info("Checking file") + } + + err := d.parseMarkdown() + if err != nil { + return err + } + + // mark document as having been handled + d.Parsed = true + + return nil +} + +// parseMarkdown parses the documents markdown. +func (d *Doc) parseMarkdown() error { + bytes, err := os.ReadFile(d.Name) + if err != nil { + return err + } + + md := bf.New(bf.WithExtensions(bf.CommonExtensions)) + + root := md.Parse(bytes) + + root.Walk(makeVisitor(d, d.ShowTOC)) + + errorCount := len(errorList) + if errorCount > 0 { + extra := "" + if errorCount != 1 { + extra = "s" + } + + var msg []string + + for _, err := range errorList { + msg = append(msg, err.Error()) + } + + return fmt.Errorf("found %d parse error%s:\n%s", + errorCount, + extra, + strings.Join(msg, "\n")) + } + + return d.check() +} + +// makeVisitor returns a function that is used to visit all document nodes. +// +// If createTOC is false, the visitor will check all nodes, but if true, the +// visitor will only display a table of contents for the document. +func makeVisitor(doc *Doc, createTOC bool) func(node *bf.Node, entering bool) bf.WalkStatus { + f := func(node *bf.Node, entering bool) bf.WalkStatus { + if !entering { + return bf.GoToNext + } + + var err error + + if createTOC { + err = doc.displayTOC(node) + } else { + err = doc.handleNode(node) + } + + if err != nil { + // The visitor cannot return an error, so collect up all parser + // errors for dealing with later. + errorList = append(errorList, err) + } + + return bf.GoToNext + } + + return f +} diff --git a/tests/cmd/check-markdown/record.go b/tests/cmd/check-markdown/record.go new file mode 100644 index 0000000000..72be75a7b1 --- /dev/null +++ b/tests/cmd/check-markdown/record.go @@ -0,0 +1,43 @@ +// Copyright (c) 2019 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package main + +import "fmt" + +func linkHeaderRecord() []string { + return []string{ + "Document", + "Address", + "Path", + "Description", + "Type", + } +} + +func linkToRecord(l Link) (record []string) { + record = append(record, l.Doc.Name) + record = append(record, l.Address) + record = append(record, l.ResolvedPath) + record = append(record, l.Description) + record = append(record, l.Type.String()) + + return record +} + +func headingHeaderRecord() []string { + return []string{ + "Name", + "Link", + "Level", + } +} +func headingToRecord(h Heading) (record []string) { + record = append(record, h.Name) + record = append(record, h.LinkName) + record = append(record, fmt.Sprintf("%d", h.Level)) + + return record +} diff --git a/tests/cmd/check-markdown/search.go b/tests/cmd/check-markdown/search.go new file mode 100644 index 0000000000..d310748a4b --- /dev/null +++ b/tests/cmd/check-markdown/search.go @@ -0,0 +1,29 @@ +// +// Copyright (c) 2019 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package main + +// headingByLinkName returns the heading associated with the specified link name. +func (d *Doc) headingByLinkName(linkName string) *Heading { + for _, heading := range d.Headings { + if heading.LinkName == linkName { + return &heading + } + } + + return nil +} + +// heading returns the heading with the name specified. +func (d *Doc) heading(name string) *Heading { + for _, heading := range d.Headings { + if name == heading.LinkName { + return &heading + } + } + + return nil +} diff --git a/tests/cmd/check-markdown/stats.go b/tests/cmd/check-markdown/stats.go new file mode 100644 index 0000000000..a73a4a09bd --- /dev/null +++ b/tests/cmd/check-markdown/stats.go @@ -0,0 +1,41 @@ +// +// Copyright (c) 2019 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package main + +import ( + "fmt" + + "github.com/sirupsen/logrus" +) + +func (d *Doc) showStats() { + var counters [LinkTypeCount]int + + linkCount := 0 + + for _, linkList := range d.Links { + for _, link := range linkList { + counters[link.Type]++ + linkCount++ + } + } + + fields := logrus.Fields{ + "headings-count": len(d.Headings), + "links-count": linkCount, + } + + for i, count := range counters { + name := LinkType(i).String() + + fieldName := fmt.Sprintf("link-type-%s-count", name) + + fields[fieldName] = count + } + + d.Logger.WithFields(fields).Info("Statistics") +} diff --git a/tests/cmd/check-markdown/toc.go b/tests/cmd/check-markdown/toc.go new file mode 100644 index 0000000000..40eb5f8e39 --- /dev/null +++ b/tests/cmd/check-markdown/toc.go @@ -0,0 +1,75 @@ +// +// Copyright (c) 2019 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package main + +import ( + "fmt" + "strings" + + bf "gopkg.in/russross/blackfriday.v2" +) + +// displayTOC displays a table of contents entry for the specified node. +func (d *Doc) displayTOC(node *bf.Node) error { + switch node.Type { + case bf.Heading: + return d.displayTOCEntryFromNode(node) + case bf.Text: + // handle blackfriday deficiencies + headings, err := d.forceCreateHeadings(node) + if err != nil { + return err + } + + for _, heading := range headings { + err := d.displayTOCEntryFromHeading(heading) + if err != nil { + return err + } + } + } + + return nil +} + +// displayTOCEntryFromHeading displays a table of contents entry +// for the specified heading. +func (d *Doc) displayTOCEntryFromHeading(heading Heading) error { + const indentSpaces = 4 + + prefix := "" + + level := heading.Level + + // Indent needs to be zero for top level headings + level-- + + if level > 0 { + prefix = strings.Repeat(" ", level*indentSpaces) + } + + entry := fmt.Sprintf("[%s](%s%s)", heading.MDName, anchorPrefix, heading.LinkName) + + fmt.Printf("%s%s %s\n", prefix, listPrefix, entry) + + return nil +} + +// displayTOCEntryFromHeading displays a table of contents entry +// for the specified heading. +func (d *Doc) displayTOCEntryFromNode(node *bf.Node) error { + if err := checkNode(node, bf.Heading); err != nil { + return err + } + + heading, err := d.makeHeading(node) + if err != nil { + return err + } + + return d.displayTOCEntryFromHeading(heading) +} diff --git a/tests/cmd/check-markdown/types.go b/tests/cmd/check-markdown/types.go new file mode 100644 index 0000000000..61b9a503ca --- /dev/null +++ b/tests/cmd/check-markdown/types.go @@ -0,0 +1,159 @@ +// +// Copyright (c) 2019 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package main + +import "github.com/sirupsen/logrus" + +// LinkType represents the type of a link in a markdown document. +type LinkType int + +const ( + unknownLink LinkType = iota + internalLink LinkType = iota + externalLink LinkType = iota // External ".md" file + externalFile LinkType = iota // External non-".md" file + urlLink LinkType = iota + mailLink LinkType = iota + LinkTypeCount LinkType = iota +) + +func (t LinkType) String() string { + var name string + + switch t { + case unknownLink: + name = "unknown" + case internalLink: + name = "internal-link" + case externalLink: + name = "external-link" + case externalFile: + name = "external-file" + case urlLink: + name = "url-link" + case mailLink: + name = "mail-link" + } + + return name +} + +// Heading is a markdown heading, which might be the destination +// for a link. +// +// Example: A heading like this: +// +// ### This is a `verbatim` heading +// +// ... would be described as: +// +// ```go +// +// Heading{ +// Name: "This is a verbatim heading", +// MDName "This is a `verbatim` heading", +// LinkName: "this-is-a-verbatim-heading", +// Level: 3, +// } +// +// ``` +type Heading struct { + // Not strictly necessary since the name is used as a hash key. + // However, storing here too makes the code simpler ;) + Name string + + // Name including any markdown syntax + MDName string + + // The encoded value of Name. + LinkName string + + // Heading level (1 for top level) + Level int +} + +// Link is a reference to another part of this document +// (or another document). +// +// Example: A link like this: +// +// [internal link](#internal-section-name) +// +// ... would be described as: +// +// ```go +// +// Link{ +// Address: "internal-section-name", +// ResolvedPath: "", +// Description: "internal link", +// Type: internalLink, +// } +// +// And a link like this: +// +// [external link](/foo.md#section-name) +// +// ... would be described as: +// +// ```go +// +// Link{ +// Address: "foo.md#section-name", +// ResolvedPath: "/docroot/foo.md", +// Description: "external link", +// Type: externalLink, +// } +// +// ``` +type Link struct { + // Document this link refers to. + Doc *Doc + + // Original address from document. + // + // Must be a valid Heading.LinkName. + // + // Not strictly necessary since the address is used as a hash key. + // However, storing here too makes the code simpler ;) + Address string + + // The fully expanded address, without any anchor and heading suffix. + // + // Only applies to certain link types. + ResolvedPath string + + // The text the user sees for the hyperlink address + Description string + + Type LinkType +} + +// Doc represents a markdown document. +type Doc struct { + Logger *logrus.Entry + + // Key: heading name + // Value: Heading + Headings map[string]Heading + + // Key: link address + // Value: *list* of links. Required since you can have multiple links with + // the same _address_, but of a different type. + Links map[string][]Link + + // Filename + Name string + + // true when this document has been fully parsed + Parsed bool + + // if true, only show the Table Of Contents + ShowTOC bool + + ListMode bool +} diff --git a/tests/cmd/check-markdown/utils.go b/tests/cmd/check-markdown/utils.go new file mode 100644 index 0000000000..bd6d415412 --- /dev/null +++ b/tests/cmd/check-markdown/utils.go @@ -0,0 +1,97 @@ +// +// Copyright (c) 2019 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package main + +import ( + "errors" + "fmt" + "os" + "strings" + "unicode" + + bf "gopkg.in/russross/blackfriday.v2" +) + +// fileExists returns true if the specified file exists, else false. +func fileExists(path string) bool { + if _, err := os.Stat(path); os.IsNotExist(err) { + return false + } + + return true +} + +// splitLink splits a link like "foo.md#section-name" into a filename +// ("foo.md") and a section name ("section-name"). +func splitLink(linkName string) (fileName, sectionName string, err error) { + if linkName == "" { + return "", "", errors.New("need linkName") + } + + if !strings.Contains(linkName, anchorPrefix) { + return linkName, "", nil + } + + fields := strings.Split(linkName, anchorPrefix) + + expectedFields := 2 + foundFields := len(fields) + if foundFields != expectedFields { + + return "", "", fmt.Errorf("invalid link %s: expected %d fields, found %d", linkName, expectedFields, foundFields) + } + + fileName = fields[0] + sectionName = fields[1] + + return fileName, sectionName, nil +} + +// validHeadingIDChar is a strings.Map() function used to determine which characters +// can appear in a heading ID. +func validHeadingIDChar(r rune) rune { + if unicode.IsLetter(r) || + unicode.IsNumber(r) || + unicode.IsSpace(r) || + r == '-' || r == '_' { + return r + } + + // Remove all other chars from destination string + return -1 +} + +// createHeadingID creates an HTML anchor name for the specified heading +func createHeadingID(headingName string) (id string, err error) { + if headingName == "" { + return "", fmt.Errorf("need heading name") + } + + // Munge the original heading into an id by: + // + // - removing invalid characters. + // - lower-casing. + // - replace spaces + id = strings.Map(validHeadingIDChar, headingName) + + id = strings.ToLower(id) + id = strings.Replace(id, " ", "-", -1) + + return id, nil +} + +func checkNode(node *bf.Node, expectedType bf.NodeType) error { + if node == nil { + return errors.New("node cannot be nil") + } + + if node.Type != expectedType { + return fmt.Errorf("expected %v node, found %v", expectedType, node.Type) + } + + return nil +} diff --git a/tests/cmd/check-markdown/utils_test.go b/tests/cmd/check-markdown/utils_test.go new file mode 100644 index 0000000000..c9899fcae5 --- /dev/null +++ b/tests/cmd/check-markdown/utils_test.go @@ -0,0 +1,149 @@ +// +// Copyright (c) 2019 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package main + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestSplitLink(t *testing.T) { + assert := assert.New(t) + + type testData struct { + linkName string + file string + section string + valid bool + } + + data := []testData{ + {"", "", "", false}, + + {"foo.md", "foo.md", "", true}, + {"#bar", "", "bar", true}, + {"foo.md#bar", "foo.md", "bar", true}, + {"foo.md%%bar", "foo.md%%bar", "", true}, + } + + for i, d := range data { + file, section, err := splitLink(d.linkName) + + if d.valid { + assert.NoErrorf(err, "test[%d]: %+v", i, d) + assert.Equal(file, d.file, "test[%d]: %+v", i, d) + assert.Equal(section, d.section, "test[%d]: %+v", i, d) + } else { + assert.Errorf(err, "test[%d]: %+v", i, d) + } + } +} + +func TestValidHeadingIDChar(t *testing.T) { + assert := assert.New(t) + + type testData struct { + ch rune + valid bool + } + + data := []testData{ + {' ', true}, + {'\t', true}, + {'\n', true}, + + {'a', true}, + {'z', true}, + {'A', true}, + {'Z', true}, + + {'0', true}, + {'9', true}, + + {'-', true}, + {'_', true}, + + {'\000', false}, + {'\001', false}, + } + + for i, d := range data { + result := validHeadingIDChar(d.ch) + + var outcome bool + + if d.valid { + outcome = result != -1 + } else { + outcome = result == -1 + } + + assert.Truef(outcome, "test[%d]: %+v", i, d) + } + + // the main list of invalid chars to test + invalid := "!@#$%^&*()+=[]{}\\|:\";'<>?,./" + + for i, ch := range invalid { + result := validHeadingIDChar(ch) + + outcome := result == -1 + + assert.Truef(outcome, "invalid[%d]: %+v", i, ch) + } +} + +func TestCreateHeadingID(t *testing.T) { + assert := assert.New(t) + + type testData struct { + heading string + id string + expecteError bool + } + + data := []testData{ + {"", "", true}, + {"a", "a", false}, + {"a.b/c:d", "abcd", false}, + {"a ?", "a-", false}, + {"a !?!", "a-", false}, + {"foo", "foo", false}, + {"foo bar", "foo-bar", false}, + {"foo_bar", "foo_bar", false}, + {"foo_bar()", "foo_bar", false}, + {"`foo_bar()`", "foo_bar", false}, + {"foo_bar()baz", "foo_barbaz", false}, + {"Stability or Performance?", "stability-or-performance", false}, + {"Hello - World", "hello---world", false}, + {"metrics_json_init()", "metrics_json_init", false}, + {"metrics_json_add_array_element(json)", "metrics_json_add_array_elementjson", false}, + {"What is it ?", "what-is-it-", false}, + {"Sandbox `DeviceInfo`", "sandbox-deviceinfo", false}, + {"Build a custom QEMU for aarch64/arm64 - REQUIRED", "build-a-custom-qemu-for-aarch64arm64---required", false}, + {"docker --net=host", "docker---nethost", false}, + {"Containerd Runtime V2 API (Shim V2 API)", "containerd-runtime-v2-api-shim-v2-api", false}, + {"Containerd Runtime V2 API: Shim V2 API", "containerd-runtime-v2-api-shim-v2-api", false}, + {"Launch i3.metal instance", "launch-i3metal-instance", false}, + {"Deploy!", "deploy", false}, + } + + for i, d := range data { + id, err := createHeadingID(d.heading) + + msg := fmt.Sprintf("test[%d]: %+v, id: %q\n", i, d, id) + + if d.expecteError { + assert.Error(err) + continue + } + + assert.Equal(id, d.id, msg) + } +}