mirror of
https://github.com/mudler/luet.git
synced 2025-06-24 06:28:46 +00:00
Instead of using gox on one side and an action to release, we can merge them together with goreleaser which will build for extra targets (arm, mips if needed in the future) and it also takes care of creating checksums, a source archive, and a changelog and creating a release with all the artifacts. All binaries should respect the old naming convention, so any scripts out there should still work. Signed-off-by: Itxaka <igarcia@suse.com>
598 lines
15 KiB
Go
598 lines
15 KiB
Go
// Copyright 2013 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package language
|
|
|
|
import (
|
|
"bytes"
|
|
"errors"
|
|
"fmt"
|
|
"sort"
|
|
|
|
"golang.org/x/text/internal/tag"
|
|
)
|
|
|
|
// isAlpha returns true if the byte is not a digit.
|
|
// b must be an ASCII letter or digit.
|
|
func isAlpha(b byte) bool {
|
|
return b > '9'
|
|
}
|
|
|
|
// isAlphaNum returns true if the string contains only ASCII letters or digits.
|
|
func isAlphaNum(s []byte) bool {
|
|
for _, c := range s {
|
|
if !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9') {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
// ErrSyntax is returned by any of the parsing functions when the
|
|
// input is not well-formed, according to BCP 47.
|
|
// TODO: return the position at which the syntax error occurred?
|
|
var ErrSyntax = errors.New("language: tag is not well-formed")
|
|
|
|
// ErrDuplicateKey is returned when a tag contains the same key twice with
|
|
// different values in the -u section.
|
|
var ErrDuplicateKey = errors.New("language: different values for same key in -u extension")
|
|
|
|
// ValueError is returned by any of the parsing functions when the
|
|
// input is well-formed but the respective subtag is not recognized
|
|
// as a valid value.
|
|
type ValueError struct {
|
|
v [8]byte
|
|
}
|
|
|
|
// NewValueError creates a new ValueError.
|
|
func NewValueError(tag []byte) ValueError {
|
|
var e ValueError
|
|
copy(e.v[:], tag)
|
|
return e
|
|
}
|
|
|
|
func (e ValueError) tag() []byte {
|
|
n := bytes.IndexByte(e.v[:], 0)
|
|
if n == -1 {
|
|
n = 8
|
|
}
|
|
return e.v[:n]
|
|
}
|
|
|
|
// Error implements the error interface.
|
|
func (e ValueError) Error() string {
|
|
return fmt.Sprintf("language: subtag %q is well-formed but unknown", e.tag())
|
|
}
|
|
|
|
// Subtag returns the subtag for which the error occurred.
|
|
func (e ValueError) Subtag() string {
|
|
return string(e.tag())
|
|
}
|
|
|
|
// scanner is used to scan BCP 47 tokens, which are separated by _ or -.
|
|
type scanner struct {
|
|
b []byte
|
|
bytes [max99thPercentileSize]byte
|
|
token []byte
|
|
start int // start position of the current token
|
|
end int // end position of the current token
|
|
next int // next point for scan
|
|
err error
|
|
done bool
|
|
}
|
|
|
|
func makeScannerString(s string) scanner {
|
|
scan := scanner{}
|
|
if len(s) <= len(scan.bytes) {
|
|
scan.b = scan.bytes[:copy(scan.bytes[:], s)]
|
|
} else {
|
|
scan.b = []byte(s)
|
|
}
|
|
scan.init()
|
|
return scan
|
|
}
|
|
|
|
// makeScanner returns a scanner using b as the input buffer.
|
|
// b is not copied and may be modified by the scanner routines.
|
|
func makeScanner(b []byte) scanner {
|
|
scan := scanner{b: b}
|
|
scan.init()
|
|
return scan
|
|
}
|
|
|
|
func (s *scanner) init() {
|
|
for i, c := range s.b {
|
|
if c == '_' {
|
|
s.b[i] = '-'
|
|
}
|
|
}
|
|
s.scan()
|
|
}
|
|
|
|
// restToLower converts the string between start and end to lower case.
|
|
func (s *scanner) toLower(start, end int) {
|
|
for i := start; i < end; i++ {
|
|
c := s.b[i]
|
|
if 'A' <= c && c <= 'Z' {
|
|
s.b[i] += 'a' - 'A'
|
|
}
|
|
}
|
|
}
|
|
|
|
func (s *scanner) setError(e error) {
|
|
if s.err == nil || (e == ErrSyntax && s.err != ErrSyntax) {
|
|
s.err = e
|
|
}
|
|
}
|
|
|
|
// resizeRange shrinks or grows the array at position oldStart such that
|
|
// a new string of size newSize can fit between oldStart and oldEnd.
|
|
// Sets the scan point to after the resized range.
|
|
func (s *scanner) resizeRange(oldStart, oldEnd, newSize int) {
|
|
s.start = oldStart
|
|
if end := oldStart + newSize; end != oldEnd {
|
|
diff := end - oldEnd
|
|
var b []byte
|
|
if n := len(s.b) + diff; n > cap(s.b) {
|
|
b = make([]byte, n)
|
|
copy(b, s.b[:oldStart])
|
|
} else {
|
|
b = s.b[:n]
|
|
}
|
|
copy(b[end:], s.b[oldEnd:])
|
|
s.b = b
|
|
s.next = end + (s.next - s.end)
|
|
s.end = end
|
|
}
|
|
}
|
|
|
|
// replace replaces the current token with repl.
|
|
func (s *scanner) replace(repl string) {
|
|
s.resizeRange(s.start, s.end, len(repl))
|
|
copy(s.b[s.start:], repl)
|
|
}
|
|
|
|
// gobble removes the current token from the input.
|
|
// Caller must call scan after calling gobble.
|
|
func (s *scanner) gobble(e error) {
|
|
s.setError(e)
|
|
if s.start == 0 {
|
|
s.b = s.b[:+copy(s.b, s.b[s.next:])]
|
|
s.end = 0
|
|
} else {
|
|
s.b = s.b[:s.start-1+copy(s.b[s.start-1:], s.b[s.end:])]
|
|
s.end = s.start - 1
|
|
}
|
|
s.next = s.start
|
|
}
|
|
|
|
// deleteRange removes the given range from s.b before the current token.
|
|
func (s *scanner) deleteRange(start, end int) {
|
|
s.b = s.b[:start+copy(s.b[start:], s.b[end:])]
|
|
diff := end - start
|
|
s.next -= diff
|
|
s.start -= diff
|
|
s.end -= diff
|
|
}
|
|
|
|
// scan parses the next token of a BCP 47 string. Tokens that are larger
|
|
// than 8 characters or include non-alphanumeric characters result in an error
|
|
// and are gobbled and removed from the output.
|
|
// It returns the end position of the last token consumed.
|
|
func (s *scanner) scan() (end int) {
|
|
end = s.end
|
|
s.token = nil
|
|
for s.start = s.next; s.next < len(s.b); {
|
|
i := bytes.IndexByte(s.b[s.next:], '-')
|
|
if i == -1 {
|
|
s.end = len(s.b)
|
|
s.next = len(s.b)
|
|
i = s.end - s.start
|
|
} else {
|
|
s.end = s.next + i
|
|
s.next = s.end + 1
|
|
}
|
|
token := s.b[s.start:s.end]
|
|
if i < 1 || i > 8 || !isAlphaNum(token) {
|
|
s.gobble(ErrSyntax)
|
|
continue
|
|
}
|
|
s.token = token
|
|
return end
|
|
}
|
|
if n := len(s.b); n > 0 && s.b[n-1] == '-' {
|
|
s.setError(ErrSyntax)
|
|
s.b = s.b[:len(s.b)-1]
|
|
}
|
|
s.done = true
|
|
return end
|
|
}
|
|
|
|
// acceptMinSize parses multiple tokens of the given size or greater.
|
|
// It returns the end position of the last token consumed.
|
|
func (s *scanner) acceptMinSize(min int) (end int) {
|
|
end = s.end
|
|
s.scan()
|
|
for ; len(s.token) >= min; s.scan() {
|
|
end = s.end
|
|
}
|
|
return end
|
|
}
|
|
|
|
// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
|
|
// failed it returns an error and any part of the tag that could be parsed.
|
|
// If parsing succeeded but an unknown value was found, it returns
|
|
// ValueError. The Tag returned in this case is just stripped of the unknown
|
|
// value. All other values are preserved. It accepts tags in the BCP 47 format
|
|
// and extensions to this standard defined in
|
|
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
|
|
func Parse(s string) (t Tag, err error) {
|
|
// TODO: consider supporting old-style locale key-value pairs.
|
|
if s == "" {
|
|
return Und, ErrSyntax
|
|
}
|
|
if len(s) <= maxAltTaglen {
|
|
b := [maxAltTaglen]byte{}
|
|
for i, c := range s {
|
|
// Generating invalid UTF-8 is okay as it won't match.
|
|
if 'A' <= c && c <= 'Z' {
|
|
c += 'a' - 'A'
|
|
} else if c == '_' {
|
|
c = '-'
|
|
}
|
|
b[i] = byte(c)
|
|
}
|
|
if t, ok := grandfathered(b); ok {
|
|
return t, nil
|
|
}
|
|
}
|
|
scan := makeScannerString(s)
|
|
return parse(&scan, s)
|
|
}
|
|
|
|
func parse(scan *scanner, s string) (t Tag, err error) {
|
|
t = Und
|
|
var end int
|
|
if n := len(scan.token); n <= 1 {
|
|
scan.toLower(0, len(scan.b))
|
|
if n == 0 || scan.token[0] != 'x' {
|
|
return t, ErrSyntax
|
|
}
|
|
end = parseExtensions(scan)
|
|
} else if n >= 4 {
|
|
return Und, ErrSyntax
|
|
} else { // the usual case
|
|
t, end = parseTag(scan)
|
|
if n := len(scan.token); n == 1 {
|
|
t.pExt = uint16(end)
|
|
end = parseExtensions(scan)
|
|
} else if end < len(scan.b) {
|
|
scan.setError(ErrSyntax)
|
|
scan.b = scan.b[:end]
|
|
}
|
|
}
|
|
if int(t.pVariant) < len(scan.b) {
|
|
if end < len(s) {
|
|
s = s[:end]
|
|
}
|
|
if len(s) > 0 && tag.Compare(s, scan.b) == 0 {
|
|
t.str = s
|
|
} else {
|
|
t.str = string(scan.b)
|
|
}
|
|
} else {
|
|
t.pVariant, t.pExt = 0, 0
|
|
}
|
|
return t, scan.err
|
|
}
|
|
|
|
// parseTag parses language, script, region and variants.
|
|
// It returns a Tag and the end position in the input that was parsed.
|
|
func parseTag(scan *scanner) (t Tag, end int) {
|
|
var e error
|
|
// TODO: set an error if an unknown lang, script or region is encountered.
|
|
t.LangID, e = getLangID(scan.token)
|
|
scan.setError(e)
|
|
scan.replace(t.LangID.String())
|
|
langStart := scan.start
|
|
end = scan.scan()
|
|
for len(scan.token) == 3 && isAlpha(scan.token[0]) {
|
|
// From http://tools.ietf.org/html/bcp47, <lang>-<extlang> tags are equivalent
|
|
// to a tag of the form <extlang>.
|
|
lang, e := getLangID(scan.token)
|
|
if lang != 0 {
|
|
t.LangID = lang
|
|
copy(scan.b[langStart:], lang.String())
|
|
scan.b[langStart+3] = '-'
|
|
scan.start = langStart + 4
|
|
}
|
|
scan.gobble(e)
|
|
end = scan.scan()
|
|
}
|
|
if len(scan.token) == 4 && isAlpha(scan.token[0]) {
|
|
t.ScriptID, e = getScriptID(script, scan.token)
|
|
if t.ScriptID == 0 {
|
|
scan.gobble(e)
|
|
}
|
|
end = scan.scan()
|
|
}
|
|
if n := len(scan.token); n >= 2 && n <= 3 {
|
|
t.RegionID, e = getRegionID(scan.token)
|
|
if t.RegionID == 0 {
|
|
scan.gobble(e)
|
|
} else {
|
|
scan.replace(t.RegionID.String())
|
|
}
|
|
end = scan.scan()
|
|
}
|
|
scan.toLower(scan.start, len(scan.b))
|
|
t.pVariant = byte(end)
|
|
end = parseVariants(scan, end, t)
|
|
t.pExt = uint16(end)
|
|
return t, end
|
|
}
|
|
|
|
var separator = []byte{'-'}
|
|
|
|
// parseVariants scans tokens as long as each token is a valid variant string.
|
|
// Duplicate variants are removed.
|
|
func parseVariants(scan *scanner, end int, t Tag) int {
|
|
start := scan.start
|
|
varIDBuf := [4]uint8{}
|
|
variantBuf := [4][]byte{}
|
|
varID := varIDBuf[:0]
|
|
variant := variantBuf[:0]
|
|
last := -1
|
|
needSort := false
|
|
for ; len(scan.token) >= 4; scan.scan() {
|
|
// TODO: measure the impact of needing this conversion and redesign
|
|
// the data structure if there is an issue.
|
|
v, ok := variantIndex[string(scan.token)]
|
|
if !ok {
|
|
// unknown variant
|
|
// TODO: allow user-defined variants?
|
|
scan.gobble(NewValueError(scan.token))
|
|
continue
|
|
}
|
|
varID = append(varID, v)
|
|
variant = append(variant, scan.token)
|
|
if !needSort {
|
|
if last < int(v) {
|
|
last = int(v)
|
|
} else {
|
|
needSort = true
|
|
// There is no legal combinations of more than 7 variants
|
|
// (and this is by no means a useful sequence).
|
|
const maxVariants = 8
|
|
if len(varID) > maxVariants {
|
|
break
|
|
}
|
|
}
|
|
}
|
|
end = scan.end
|
|
}
|
|
if needSort {
|
|
sort.Sort(variantsSort{varID, variant})
|
|
k, l := 0, -1
|
|
for i, v := range varID {
|
|
w := int(v)
|
|
if l == w {
|
|
// Remove duplicates.
|
|
continue
|
|
}
|
|
varID[k] = varID[i]
|
|
variant[k] = variant[i]
|
|
k++
|
|
l = w
|
|
}
|
|
if str := bytes.Join(variant[:k], separator); len(str) == 0 {
|
|
end = start - 1
|
|
} else {
|
|
scan.resizeRange(start, end, len(str))
|
|
copy(scan.b[scan.start:], str)
|
|
end = scan.end
|
|
}
|
|
}
|
|
return end
|
|
}
|
|
|
|
type variantsSort struct {
|
|
i []uint8
|
|
v [][]byte
|
|
}
|
|
|
|
func (s variantsSort) Len() int {
|
|
return len(s.i)
|
|
}
|
|
|
|
func (s variantsSort) Swap(i, j int) {
|
|
s.i[i], s.i[j] = s.i[j], s.i[i]
|
|
s.v[i], s.v[j] = s.v[j], s.v[i]
|
|
}
|
|
|
|
func (s variantsSort) Less(i, j int) bool {
|
|
return s.i[i] < s.i[j]
|
|
}
|
|
|
|
type bytesSort struct {
|
|
b [][]byte
|
|
n int // first n bytes to compare
|
|
}
|
|
|
|
func (b bytesSort) Len() int {
|
|
return len(b.b)
|
|
}
|
|
|
|
func (b bytesSort) Swap(i, j int) {
|
|
b.b[i], b.b[j] = b.b[j], b.b[i]
|
|
}
|
|
|
|
func (b bytesSort) Less(i, j int) bool {
|
|
for k := 0; k < b.n; k++ {
|
|
if b.b[i][k] == b.b[j][k] {
|
|
continue
|
|
}
|
|
return b.b[i][k] < b.b[j][k]
|
|
}
|
|
return false
|
|
}
|
|
|
|
// parseExtensions parses and normalizes the extensions in the buffer.
|
|
// It returns the last position of scan.b that is part of any extension.
|
|
// It also trims scan.b to remove excess parts accordingly.
|
|
func parseExtensions(scan *scanner) int {
|
|
start := scan.start
|
|
exts := [][]byte{}
|
|
private := []byte{}
|
|
end := scan.end
|
|
for len(scan.token) == 1 {
|
|
extStart := scan.start
|
|
ext := scan.token[0]
|
|
end = parseExtension(scan)
|
|
extension := scan.b[extStart:end]
|
|
if len(extension) < 3 || (ext != 'x' && len(extension) < 4) {
|
|
scan.setError(ErrSyntax)
|
|
end = extStart
|
|
continue
|
|
} else if start == extStart && (ext == 'x' || scan.start == len(scan.b)) {
|
|
scan.b = scan.b[:end]
|
|
return end
|
|
} else if ext == 'x' {
|
|
private = extension
|
|
break
|
|
}
|
|
exts = append(exts, extension)
|
|
}
|
|
sort.Sort(bytesSort{exts, 1})
|
|
if len(private) > 0 {
|
|
exts = append(exts, private)
|
|
}
|
|
scan.b = scan.b[:start]
|
|
if len(exts) > 0 {
|
|
scan.b = append(scan.b, bytes.Join(exts, separator)...)
|
|
} else if start > 0 {
|
|
// Strip trailing '-'.
|
|
scan.b = scan.b[:start-1]
|
|
}
|
|
return end
|
|
}
|
|
|
|
// parseExtension parses a single extension and returns the position of
|
|
// the extension end.
|
|
func parseExtension(scan *scanner) int {
|
|
start, end := scan.start, scan.end
|
|
switch scan.token[0] {
|
|
case 'u': // https://www.ietf.org/rfc/rfc6067.txt
|
|
attrStart := end
|
|
scan.scan()
|
|
for last := []byte{}; len(scan.token) > 2; scan.scan() {
|
|
if bytes.Compare(scan.token, last) != -1 {
|
|
// Attributes are unsorted. Start over from scratch.
|
|
p := attrStart + 1
|
|
scan.next = p
|
|
attrs := [][]byte{}
|
|
for scan.scan(); len(scan.token) > 2; scan.scan() {
|
|
attrs = append(attrs, scan.token)
|
|
end = scan.end
|
|
}
|
|
sort.Sort(bytesSort{attrs, 3})
|
|
copy(scan.b[p:], bytes.Join(attrs, separator))
|
|
break
|
|
}
|
|
last = scan.token
|
|
end = scan.end
|
|
}
|
|
// Scan key-type sequences. A key is of length 2 and may be followed
|
|
// by 0 or more "type" subtags from 3 to the maximum of 8 letters.
|
|
var last, key []byte
|
|
for attrEnd := end; len(scan.token) == 2; last = key {
|
|
key = scan.token
|
|
end = scan.end
|
|
for scan.scan(); end < scan.end && len(scan.token) > 2; scan.scan() {
|
|
end = scan.end
|
|
}
|
|
// TODO: check key value validity
|
|
if bytes.Compare(key, last) != 1 || scan.err != nil {
|
|
// We have an invalid key or the keys are not sorted.
|
|
// Start scanning keys from scratch and reorder.
|
|
p := attrEnd + 1
|
|
scan.next = p
|
|
keys := [][]byte{}
|
|
for scan.scan(); len(scan.token) == 2; {
|
|
keyStart := scan.start
|
|
end = scan.end
|
|
for scan.scan(); end < scan.end && len(scan.token) > 2; scan.scan() {
|
|
end = scan.end
|
|
}
|
|
keys = append(keys, scan.b[keyStart:end])
|
|
}
|
|
sort.Stable(bytesSort{keys, 2})
|
|
if n := len(keys); n > 0 {
|
|
k := 0
|
|
for i := 1; i < n; i++ {
|
|
if !bytes.Equal(keys[k][:2], keys[i][:2]) {
|
|
k++
|
|
keys[k] = keys[i]
|
|
} else if !bytes.Equal(keys[k], keys[i]) {
|
|
scan.setError(ErrDuplicateKey)
|
|
}
|
|
}
|
|
keys = keys[:k+1]
|
|
}
|
|
reordered := bytes.Join(keys, separator)
|
|
if e := p + len(reordered); e < end {
|
|
scan.deleteRange(e, end)
|
|
end = e
|
|
}
|
|
copy(scan.b[p:], reordered)
|
|
break
|
|
}
|
|
}
|
|
case 't': // https://www.ietf.org/rfc/rfc6497.txt
|
|
scan.scan()
|
|
if n := len(scan.token); n >= 2 && n <= 3 && isAlpha(scan.token[1]) {
|
|
_, end = parseTag(scan)
|
|
scan.toLower(start, end)
|
|
}
|
|
for len(scan.token) == 2 && !isAlpha(scan.token[1]) {
|
|
end = scan.acceptMinSize(3)
|
|
}
|
|
case 'x':
|
|
end = scan.acceptMinSize(1)
|
|
default:
|
|
end = scan.acceptMinSize(2)
|
|
}
|
|
return end
|
|
}
|
|
|
|
// getExtension returns the name, body and end position of the extension.
|
|
func getExtension(s string, p int) (end int, ext string) {
|
|
if s[p] == '-' {
|
|
p++
|
|
}
|
|
if s[p] == 'x' {
|
|
return len(s), s[p:]
|
|
}
|
|
end = nextExtension(s, p)
|
|
return end, s[p:end]
|
|
}
|
|
|
|
// nextExtension finds the next extension within the string, searching
|
|
// for the -<char>- pattern from position p.
|
|
// In the fast majority of cases, language tags will have at most
|
|
// one extension and extensions tend to be small.
|
|
func nextExtension(s string, p int) int {
|
|
for n := len(s) - 3; p < n; {
|
|
if s[p] == '-' {
|
|
if s[p+2] == '-' {
|
|
return p
|
|
}
|
|
p += 3
|
|
} else {
|
|
p++
|
|
}
|
|
}
|
|
return len(s)
|
|
}
|