Files
luet/vendor/mvdan.cc/sh/v3/pattern/pattern.go
2019-12-16 23:56:58 +01:00

296 lines
7.1 KiB
Go

// Copyright (c) 2017, Daniel Martí <mvdan@mvdan.cc>
// See LICENSE for licensing information
// Package pattern allows working with shell pattern matching notation, also
// known as wildcards or globbing.
//
// For reference, see
// https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_13.
package pattern
import (
"bytes"
"fmt"
"regexp"
"strconv"
"strings"
)
// TODO: support Mode in the other APIs too
type Mode uint
const (
Shortest Mode = 1 << iota // prefer the shortest match.
Filenames // "*" and "?" don't match slashes; only "**" does
Braces // support "{a,b}" and "{1..4}"
)
var numRange = regexp.MustCompile(`^([+-]?\d+)\.\.([+-]?\d+)}`)
// Regexp turns a shell pattern into a regular expression that can be used with
// regexp.Compile. It will return an error if the input pattern was incorrect.
// Otherwise, the returned expression can be passed to regexp.MustCompile.
//
// For example, Regexp(`foo*bar?`, true) returns `foo.*bar.`.
//
// Note that this function (and QuoteMeta) should not be directly used with file
// paths if Windows is supported, as the path separator on that platform is the
// same character as the escaping character for shell patterns.
func Regexp(pat string, mode Mode) (string, error) {
any := false
noopLoop:
for _, r := range pat {
switch r {
// including those that need escaping since they are
// regular expression metacharacters
case '*', '?', '[', '\\', '.', '+', '(', ')', '|',
']', '{', '}', '^', '$':
any = true
break noopLoop
}
}
if !any { // short-cut without a string copy
return pat, nil
}
closingBraces := []int{}
var buf bytes.Buffer
writeLoop:
for i := 0; i < len(pat); i++ {
switch c := pat[i]; c {
case '*':
if mode&Filenames != 0 {
if i++; i < len(pat) && pat[i] == '*' {
if i++; i < len(pat) && pat[i] == '/' {
buf.WriteString("(.*/|)")
} else {
buf.WriteString(".*")
i--
}
} else {
buf.WriteString("[^/]*")
i--
}
} else {
buf.WriteString(".*")
}
if mode&Shortest != 0 {
buf.WriteByte('?')
}
case '?':
if mode&Filenames != 0 {
buf.WriteString("[^/]")
} else {
buf.WriteByte('.')
}
case '\\':
if i++; i >= len(pat) {
return "", fmt.Errorf(`\ at end of pattern`)
}
buf.WriteString(regexp.QuoteMeta(string(pat[i])))
case '[':
name, err := charClass(pat[i:])
if err != nil {
return "", err
}
if name != "" {
buf.WriteString(name)
i += len(name) - 1
break
}
if mode&Filenames != 0 {
for _, c := range pat[i:] {
if c == ']' {
break
} else if c == '/' {
buf.WriteString("\\[")
continue writeLoop
}
}
}
buf.WriteByte(c)
if i++; i >= len(pat) {
return "", fmt.Errorf("[ was not matched with a closing ]")
}
switch c = pat[i]; c {
case '!', '^':
buf.WriteByte('^')
if i++; i >= len(pat) {
return "", fmt.Errorf("[ was not matched with a closing ]")
}
}
if c = pat[i]; c == ']' {
buf.WriteByte(']')
if i++; i >= len(pat) {
return "", fmt.Errorf("[ was not matched with a closing ]")
}
}
rangeStart := byte(0)
loopBracket:
for ; i < len(pat); i++ {
c = pat[i]
buf.WriteByte(c)
switch c {
case '\\':
if i++; i < len(pat) {
buf.WriteByte(pat[i])
}
continue
case ']':
break loopBracket
}
if rangeStart != 0 && rangeStart > c {
return "", fmt.Errorf("invalid range: %c-%c", rangeStart, c)
}
if c == '-' {
rangeStart = pat[i-1]
} else {
rangeStart = 0
}
}
if i >= len(pat) {
return "", fmt.Errorf("[ was not matched with a closing ]")
}
case '{':
if mode&Braces == 0 {
buf.WriteString(regexp.QuoteMeta(string(c)))
break
}
innerLevel := 1
commas := false
peekBrace:
for j := i + 1; j < len(pat); j++ {
switch c := pat[j]; c {
case '{':
innerLevel++
case ',':
commas = true
case '\\':
j++
case '}':
if innerLevel--; innerLevel > 0 {
continue
}
if !commas {
break peekBrace
}
closingBraces = append(closingBraces, j)
buf.WriteString("(?:")
continue writeLoop
}
}
if match := numRange.FindStringSubmatch(pat[i+1:]); len(match) == 3 {
start, err1 := strconv.Atoi(match[1])
end, err2 := strconv.Atoi(match[2])
if err1 != nil || err2 != nil || start > end {
return "", fmt.Errorf("invalid range: %q", match[0])
}
// TODO: can we do better here?
buf.WriteString("(?:")
for n := start; n <= end; n++ {
if n > start {
buf.WriteByte('|')
}
fmt.Fprintf(&buf, "%d", n)
}
buf.WriteByte(')')
i += len(match[0])
break
}
buf.WriteString(regexp.QuoteMeta(string(c)))
case ',':
if len(closingBraces) == 0 {
buf.WriteString(regexp.QuoteMeta(string(c)))
} else {
buf.WriteByte('|')
}
case '}':
if len(closingBraces) > 0 && closingBraces[len(closingBraces)-1] == i {
buf.WriteByte(')')
closingBraces = closingBraces[:len(closingBraces)-1]
} else {
buf.WriteString(regexp.QuoteMeta(string(c)))
}
default:
if c > 128 {
buf.WriteByte(c)
} else {
buf.WriteString(regexp.QuoteMeta(string(c)))
}
}
}
return buf.String(), nil
}
func charClass(s string) (string, error) {
if strings.HasPrefix(s, "[[.") || strings.HasPrefix(s, "[[=") {
return "", fmt.Errorf("collating features not available")
}
if !strings.HasPrefix(s, "[[:") {
return "", nil
}
name := s[3:]
end := strings.Index(name, ":]]")
if end < 0 {
return "", fmt.Errorf("[[: was not matched with a closing :]]")
}
name = name[:end]
switch name {
case "alnum", "alpha", "ascii", "blank", "cntrl", "digit", "graph",
"lower", "print", "punct", "space", "upper", "word", "xdigit":
default:
return "", fmt.Errorf("invalid character class: %q", name)
}
return s[:len(name)+6], nil
}
// HasMeta returns whether a string contains any unescaped pattern
// metacharacters: '*', '?', or '['. When the function returns false, the given
// pattern can only match at most one string.
//
// For example, HasMeta(`foo\*bar`) returns false, but HasMeta(`foo*bar`)
// returns true.
//
// This can be useful to avoid extra work, like TranslatePattern. Note that this
// function cannot be used to avoid QuotePattern, as backslashes are quoted by
// that function but ignored here.
func HasMeta(pat string) bool {
for i := 0; i < len(pat); i++ {
switch pat[i] {
case '\\':
i++
case '*', '?', '[':
return true
}
}
return false
}
// QuoteMeta returns a string that quotes all pattern metacharacters in the
// given text. The returned string is a pattern that matches the literal text.
//
// For example, QuoteMeta(`foo*bar?`) returns `foo\*bar\?`.
func QuoteMeta(pat string) string {
any := false
loop:
for _, r := range pat {
switch r {
case '*', '?', '[', '\\':
any = true
break loop
}
}
if !any { // short-cut without a string copy
return pat
}
var buf bytes.Buffer
for _, r := range pat {
switch r {
case '*', '?', '[', '\\':
buf.WriteByte('\\')
}
buf.WriteRune(r)
}
return buf.String()
}