Update vendor/

This commit is contained in:
Ettore Di Giacinto
2019-10-29 17:37:49 +01:00
committed by Ettore Di Giacinto
parent 8ca6051a04
commit 9d0dc601b7
374 changed files with 103281 additions and 37 deletions

58
vendor/golang.org/x/text/internal/triegen/compact.go generated vendored Normal file
View File

@@ -0,0 +1,58 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package triegen
// This file defines Compacter and its implementations.
import "io"
// A Compacter generates an alternative, more space-efficient way to store a
// trie value block. A trie value block holds all possible values for the last
// byte of a UTF-8 encoded rune. Excluding ASCII characters, a trie value block
// always has 64 values, as a UTF-8 encoding ends with a byte in [0x80, 0xC0).
type Compacter interface {
// Size returns whether the Compacter could encode the given block as well
// as its size in case it can. len(v) is always 64.
Size(v []uint64) (sz int, ok bool)
// Store stores the block using the Compacter's compression method.
// It returns a handle with which the block can be retrieved.
// len(v) is always 64.
Store(v []uint64) uint32
// Print writes the data structures associated to the given store to w.
Print(w io.Writer) error
// Handler returns the name of a function that gets called during trie
// lookup for blocks generated by the Compacter. The function should be of
// the form func (n uint32, b byte) uint64, where n is the index returned by
// the Compacter's Store method and b is the last byte of the UTF-8
// encoding, where 0x80 <= b < 0xC0, for which to do the lookup in the
// block.
Handler() string
}
// simpleCompacter is the default Compacter used by builder. It implements a
// normal trie block.
type simpleCompacter builder
func (b *simpleCompacter) Size([]uint64) (sz int, ok bool) {
return blockSize * b.ValueSize, true
}
func (b *simpleCompacter) Store(v []uint64) uint32 {
h := uint32(len(b.ValueBlocks) - blockOffset)
b.ValueBlocks = append(b.ValueBlocks, v)
return h
}
func (b *simpleCompacter) Print(io.Writer) error {
// Structures are printed in print.go.
return nil
}
func (b *simpleCompacter) Handler() string {
panic("Handler should be special-cased for this Compacter")
}

251
vendor/golang.org/x/text/internal/triegen/print.go generated vendored Normal file
View File

@@ -0,0 +1,251 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package triegen
import (
"bytes"
"fmt"
"io"
"strings"
"text/template"
)
// print writes all the data structures as well as the code necessary to use the
// trie to w.
func (b *builder) print(w io.Writer) error {
b.Stats.NValueEntries = len(b.ValueBlocks) * blockSize
b.Stats.NValueBytes = len(b.ValueBlocks) * blockSize * b.ValueSize
b.Stats.NIndexEntries = len(b.IndexBlocks) * blockSize
b.Stats.NIndexBytes = len(b.IndexBlocks) * blockSize * b.IndexSize
b.Stats.NHandleBytes = len(b.Trie) * 2 * b.IndexSize
// If we only have one root trie, all starter blocks are at position 0 and
// we can access the arrays directly.
if len(b.Trie) == 1 {
// At this point we cannot refer to the generated tables directly.
b.ASCIIBlock = b.Name + "Values"
b.StarterBlock = b.Name + "Index"
} else {
// Otherwise we need to have explicit starter indexes in the trie
// structure.
b.ASCIIBlock = "t.ascii"
b.StarterBlock = "t.utf8Start"
}
b.SourceType = "[]byte"
if err := lookupGen.Execute(w, b); err != nil {
return err
}
b.SourceType = "string"
if err := lookupGen.Execute(w, b); err != nil {
return err
}
if err := trieGen.Execute(w, b); err != nil {
return err
}
for _, c := range b.Compactions {
if err := c.c.Print(w); err != nil {
return err
}
}
return nil
}
func printValues(n int, values []uint64) string {
w := &bytes.Buffer{}
boff := n * blockSize
fmt.Fprintf(w, "\t// Block %#x, offset %#x", n, boff)
var newline bool
for i, v := range values {
if i%6 == 0 {
newline = true
}
if v != 0 {
if newline {
fmt.Fprintf(w, "\n")
newline = false
}
fmt.Fprintf(w, "\t%#02x:%#04x, ", boff+i, v)
}
}
return w.String()
}
func printIndex(b *builder, nr int, n *node) string {
w := &bytes.Buffer{}
boff := nr * blockSize
fmt.Fprintf(w, "\t// Block %#x, offset %#x", nr, boff)
var newline bool
for i, c := range n.children {
if i%8 == 0 {
newline = true
}
if c != nil {
v := b.Compactions[c.index.compaction].Offset + uint32(c.index.index)
if v != 0 {
if newline {
fmt.Fprintf(w, "\n")
newline = false
}
fmt.Fprintf(w, "\t%#02x:%#02x, ", boff+i, v)
}
}
}
return w.String()
}
var (
trieGen = template.Must(template.New("trie").Funcs(template.FuncMap{
"printValues": printValues,
"printIndex": printIndex,
"title": strings.Title,
"dec": func(x int) int { return x - 1 },
"psize": func(n int) string {
return fmt.Sprintf("%d bytes (%.2f KiB)", n, float64(n)/1024)
},
}).Parse(trieTemplate))
lookupGen = template.Must(template.New("lookup").Parse(lookupTemplate))
)
// TODO: consider the return type of lookup. It could be uint64, even if the
// internal value type is smaller. We will have to verify this with the
// performance of unicode/norm, which is very sensitive to such changes.
const trieTemplate = `{{$b := .}}{{$multi := gt (len .Trie) 1}}
// {{.Name}}Trie. Total size: {{psize .Size}}. Checksum: {{printf "%08x" .Checksum}}.
type {{.Name}}Trie struct { {{if $multi}}
ascii []{{.ValueType}} // index for ASCII bytes
utf8Start []{{.IndexType}} // index for UTF-8 bytes >= 0xC0
{{end}}}
func new{{title .Name}}Trie(i int) *{{.Name}}Trie { {{if $multi}}
h := {{.Name}}TrieHandles[i]
return &{{.Name}}Trie{ {{.Name}}Values[uint32(h.ascii)<<6:], {{.Name}}Index[uint32(h.multi)<<6:] }
}
type {{.Name}}TrieHandle struct {
ascii, multi {{.IndexType}}
}
// {{.Name}}TrieHandles: {{len .Trie}} handles, {{.Stats.NHandleBytes}} bytes
var {{.Name}}TrieHandles = [{{len .Trie}}]{{.Name}}TrieHandle{
{{range .Trie}} { {{.ASCIIIndex}}, {{.StarterIndex}} }, // {{printf "%08x" .Checksum}}: {{.Name}}
{{end}}}{{else}}
return &{{.Name}}Trie{}
}
{{end}}
// lookupValue determines the type of block n and looks up the value for b.
func (t *{{.Name}}Trie) lookupValue(n uint32, b byte) {{.ValueType}}{{$last := dec (len .Compactions)}} {
switch { {{range $i, $c := .Compactions}}
{{if eq $i $last}}default{{else}}case n < {{$c.Cutoff}}{{end}}:{{if ne $i 0}}
n -= {{$c.Offset}}{{end}}
return {{print $b.ValueType}}({{$c.Handler}}){{end}}
}
}
// {{.Name}}Values: {{len .ValueBlocks}} blocks, {{.Stats.NValueEntries}} entries, {{.Stats.NValueBytes}} bytes
// The third block is the zero block.
var {{.Name}}Values = [{{.Stats.NValueEntries}}]{{.ValueType}} {
{{range $i, $v := .ValueBlocks}}{{printValues $i $v}}
{{end}}}
// {{.Name}}Index: {{len .IndexBlocks}} blocks, {{.Stats.NIndexEntries}} entries, {{.Stats.NIndexBytes}} bytes
// Block 0 is the zero block.
var {{.Name}}Index = [{{.Stats.NIndexEntries}}]{{.IndexType}} {
{{range $i, $v := .IndexBlocks}}{{printIndex $b $i $v}}
{{end}}}
`
// TODO: consider allowing zero-length strings after evaluating performance with
// unicode/norm.
const lookupTemplate = `
// lookup{{if eq .SourceType "string"}}String{{end}} returns the trie value for the first UTF-8 encoding in s and
// the width in bytes of this encoding. The size will be 0 if s does not
// hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *{{.Name}}Trie) lookup{{if eq .SourceType "string"}}String{{end}}(s {{.SourceType}}) (v {{.ValueType}}, sz int) {
c0 := s[0]
switch {
case c0 < 0x80: // is ASCII
return {{.ASCIIBlock}}[c0], 1
case c0 < 0xC2:
return 0, 1 // Illegal UTF-8: not a starter, not ASCII.
case c0 < 0xE0: // 2-byte UTF-8
if len(s) < 2 {
return 0, 0
}
i := {{.StarterBlock}}[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return 0, 1 // Illegal UTF-8: not a continuation byte.
}
return t.lookupValue(uint32(i), c1), 2
case c0 < 0xF0: // 3-byte UTF-8
if len(s) < 3 {
return 0, 0
}
i := {{.StarterBlock}}[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return 0, 1 // Illegal UTF-8: not a continuation byte.
}
o := uint32(i)<<6 + uint32(c1)
i = {{.Name}}Index[o]
c2 := s[2]
if c2 < 0x80 || 0xC0 <= c2 {
return 0, 2 // Illegal UTF-8: not a continuation byte.
}
return t.lookupValue(uint32(i), c2), 3
case c0 < 0xF8: // 4-byte UTF-8
if len(s) < 4 {
return 0, 0
}
i := {{.StarterBlock}}[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return 0, 1 // Illegal UTF-8: not a continuation byte.
}
o := uint32(i)<<6 + uint32(c1)
i = {{.Name}}Index[o]
c2 := s[2]
if c2 < 0x80 || 0xC0 <= c2 {
return 0, 2 // Illegal UTF-8: not a continuation byte.
}
o = uint32(i)<<6 + uint32(c2)
i = {{.Name}}Index[o]
c3 := s[3]
if c3 < 0x80 || 0xC0 <= c3 {
return 0, 3 // Illegal UTF-8: not a continuation byte.
}
return t.lookupValue(uint32(i), c3), 4
}
// Illegal rune
return 0, 1
}
// lookup{{if eq .SourceType "string"}}String{{end}}Unsafe returns the trie value for the first UTF-8 encoding in s.
// s must start with a full and valid UTF-8 encoded rune.
func (t *{{.Name}}Trie) lookup{{if eq .SourceType "string"}}String{{end}}Unsafe(s {{.SourceType}}) {{.ValueType}} {
c0 := s[0]
if c0 < 0x80 { // is ASCII
return {{.ASCIIBlock}}[c0]
}
i := {{.StarterBlock}}[c0]
if c0 < 0xE0 { // 2-byte UTF-8
return t.lookupValue(uint32(i), s[1])
}
i = {{.Name}}Index[uint32(i)<<6+uint32(s[1])]
if c0 < 0xF0 { // 3-byte UTF-8
return t.lookupValue(uint32(i), s[2])
}
i = {{.Name}}Index[uint32(i)<<6+uint32(s[2])]
if c0 < 0xF8 { // 4-byte UTF-8
return t.lookupValue(uint32(i), s[3])
}
return 0
}
`

494
vendor/golang.org/x/text/internal/triegen/triegen.go generated vendored Normal file
View File

@@ -0,0 +1,494 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package triegen implements a code generator for a trie for associating
// unsigned integer values with UTF-8 encoded runes.
//
// Many of the go.text packages use tries for storing per-rune information. A
// trie is especially useful if many of the runes have the same value. If this
// is the case, many blocks can be expected to be shared allowing for
// information on many runes to be stored in little space.
//
// As most of the lookups are done directly on []byte slices, the tries use the
// UTF-8 bytes directly for the lookup. This saves a conversion from UTF-8 to
// runes and contributes a little bit to better performance. It also naturally
// provides a fast path for ASCII.
//
// Space is also an issue. There are many code points defined in Unicode and as
// a result tables can get quite large. So every byte counts. The triegen
// package automatically chooses the smallest integer values to represent the
// tables. Compacters allow further compression of the trie by allowing for
// alternative representations of individual trie blocks.
//
// triegen allows generating multiple tries as a single structure. This is
// useful when, for example, one wants to generate tries for several languages
// that have a lot of values in common. Some existing libraries for
// internationalization store all per-language data as a dynamically loadable
// chunk. The go.text packages are designed with the assumption that the user
// typically wants to compile in support for all supported languages, in line
// with the approach common to Go to create a single standalone binary. The
// multi-root trie approach can give significant storage savings in this
// scenario.
//
// triegen generates both tables and code. The code is optimized to use the
// automatically chosen data types. The following code is generated for a Trie
// or multiple Tries named "foo":
// - type fooTrie
// The trie type.
//
// - func newFooTrie(x int) *fooTrie
// Trie constructor, where x is the index of the trie passed to Gen.
//
// - func (t *fooTrie) lookup(s []byte) (v uintX, sz int)
// The lookup method, where uintX is automatically chosen.
//
// - func lookupString, lookupUnsafe and lookupStringUnsafe
// Variants of the above.
//
// - var fooValues and fooIndex and any tables generated by Compacters.
// The core trie data.
//
// - var fooTrieHandles
// Indexes of starter blocks in case of multiple trie roots.
//
// It is recommended that users test the generated trie by checking the returned
// value for every rune. Such exhaustive tests are possible as the number of
// runes in Unicode is limited.
package triegen // import "golang.org/x/text/internal/triegen"
// TODO: Arguably, the internally optimized data types would not have to be
// exposed in the generated API. We could also investigate not generating the
// code, but using it through a package. We would have to investigate the impact
// on performance of making such change, though. For packages like unicode/norm,
// small changes like this could tank performance.
import (
"encoding/binary"
"fmt"
"hash/crc64"
"io"
"log"
"unicode/utf8"
)
// builder builds a set of tries for associating values with runes. The set of
// tries can share common index and value blocks.
type builder struct {
Name string
// ValueType is the type of the trie values looked up.
ValueType string
// ValueSize is the byte size of the ValueType.
ValueSize int
// IndexType is the type of trie index values used for all UTF-8 bytes of
// a rune except the last one.
IndexType string
// IndexSize is the byte size of the IndexType.
IndexSize int
// SourceType is used when generating the lookup functions. If the user
// requests StringSupport, all lookup functions will be generated for
// string input as well.
SourceType string
Trie []*Trie
IndexBlocks []*node
ValueBlocks [][]uint64
Compactions []compaction
Checksum uint64
ASCIIBlock string
StarterBlock string
indexBlockIdx map[uint64]int
valueBlockIdx map[uint64]nodeIndex
asciiBlockIdx map[uint64]int
// Stats are used to fill out the template.
Stats struct {
NValueEntries int
NValueBytes int
NIndexEntries int
NIndexBytes int
NHandleBytes int
}
err error
}
// A nodeIndex encodes the index of a node, which is defined by the compaction
// which stores it and an index within the compaction. For internal nodes, the
// compaction is always 0.
type nodeIndex struct {
compaction int
index int
}
// compaction keeps track of stats used for the compaction.
type compaction struct {
c Compacter
blocks []*node
maxHandle uint32
totalSize int
// Used by template-based generator and thus exported.
Cutoff uint32
Offset uint32
Handler string
}
func (b *builder) setError(err error) {
if b.err == nil {
b.err = err
}
}
// An Option can be passed to Gen.
type Option func(b *builder) error
// Compact configures the trie generator to use the given Compacter.
func Compact(c Compacter) Option {
return func(b *builder) error {
b.Compactions = append(b.Compactions, compaction{
c: c,
Handler: c.Handler() + "(n, b)"})
return nil
}
}
// Gen writes Go code for a shared trie lookup structure to w for the given
// Tries. The generated trie type will be called nameTrie. newNameTrie(x) will
// return the *nameTrie for tries[x]. A value can be looked up by using one of
// the various lookup methods defined on nameTrie. It returns the table size of
// the generated trie.
func Gen(w io.Writer, name string, tries []*Trie, opts ...Option) (sz int, err error) {
// The index contains two dummy blocks, followed by the zero block. The zero
// block is at offset 0x80, so that the offset for the zero block for
// continuation bytes is 0.
b := &builder{
Name: name,
Trie: tries,
IndexBlocks: []*node{{}, {}, {}},
Compactions: []compaction{{
Handler: name + "Values[n<<6+uint32(b)]",
}},
// The 0 key in indexBlockIdx and valueBlockIdx is the hash of the zero
// block.
indexBlockIdx: map[uint64]int{0: 0},
valueBlockIdx: map[uint64]nodeIndex{0: {}},
asciiBlockIdx: map[uint64]int{},
}
b.Compactions[0].c = (*simpleCompacter)(b)
for _, f := range opts {
if err := f(b); err != nil {
return 0, err
}
}
b.build()
if b.err != nil {
return 0, b.err
}
if err = b.print(w); err != nil {
return 0, err
}
return b.Size(), nil
}
// A Trie represents a single root node of a trie. A builder may build several
// overlapping tries at once.
type Trie struct {
root *node
hiddenTrie
}
// hiddenTrie contains values we want to be visible to the template generator,
// but hidden from the API documentation.
type hiddenTrie struct {
Name string
Checksum uint64
ASCIIIndex int
StarterIndex int
}
// NewTrie returns a new trie root.
func NewTrie(name string) *Trie {
return &Trie{
&node{
children: make([]*node, blockSize),
values: make([]uint64, utf8.RuneSelf),
},
hiddenTrie{Name: name},
}
}
// Gen is a convenience wrapper around the Gen func passing t as the only trie
// and uses the name passed to NewTrie. It returns the size of the generated
// tables.
func (t *Trie) Gen(w io.Writer, opts ...Option) (sz int, err error) {
return Gen(w, t.Name, []*Trie{t}, opts...)
}
// node is a node of the intermediate trie structure.
type node struct {
// children holds this node's children. It is always of length 64.
// A child node may be nil.
children []*node
// values contains the values of this node. If it is non-nil, this node is
// either a root or leaf node:
// For root nodes, len(values) == 128 and it maps the bytes in [0x00, 0x7F].
// For leaf nodes, len(values) == 64 and it maps the bytes in [0x80, 0xBF].
values []uint64
index nodeIndex
}
// Insert associates value with the given rune. Insert will panic if a non-zero
// value is passed for an invalid rune.
func (t *Trie) Insert(r rune, value uint64) {
if value == 0 {
return
}
s := string(r)
if []rune(s)[0] != r && value != 0 {
// Note: The UCD tables will always assign what amounts to a zero value
// to a surrogate. Allowing a zero value for an illegal rune allows
// users to iterate over [0..MaxRune] without having to explicitly
// exclude surrogates, which would be tedious.
panic(fmt.Sprintf("triegen: non-zero value for invalid rune %U", r))
}
if len(s) == 1 {
// It is a root node value (ASCII).
t.root.values[s[0]] = value
return
}
n := t.root
for ; len(s) > 1; s = s[1:] {
if n.children == nil {
n.children = make([]*node, blockSize)
}
p := s[0] % blockSize
c := n.children[p]
if c == nil {
c = &node{}
n.children[p] = c
}
if len(s) > 2 && c.values != nil {
log.Fatalf("triegen: insert(%U): found internal node with values", r)
}
n = c
}
if n.values == nil {
n.values = make([]uint64, blockSize)
}
if n.children != nil {
log.Fatalf("triegen: insert(%U): found leaf node that also has child nodes", r)
}
n.values[s[0]-0x80] = value
}
// Size returns the number of bytes the generated trie will take to store. It
// needs to be exported as it is used in the templates.
func (b *builder) Size() int {
// Index blocks.
sz := len(b.IndexBlocks) * blockSize * b.IndexSize
// Skip the first compaction, which represents the normal value blocks, as
// its totalSize does not account for the ASCII blocks, which are managed
// separately.
sz += len(b.ValueBlocks) * blockSize * b.ValueSize
for _, c := range b.Compactions[1:] {
sz += c.totalSize
}
// TODO: this computation does not account for the fixed overhead of a using
// a compaction, either code or data. As for data, though, the typical
// overhead of data is in the order of bytes (2 bytes for cases). Further,
// the savings of using a compaction should anyway be substantial for it to
// be worth it.
// For multi-root tries, we also need to account for the handles.
if len(b.Trie) > 1 {
sz += 2 * b.IndexSize * len(b.Trie)
}
return sz
}
func (b *builder) build() {
// Compute the sizes of the values.
var vmax uint64
for _, t := range b.Trie {
vmax = maxValue(t.root, vmax)
}
b.ValueType, b.ValueSize = getIntType(vmax)
// Compute all block allocations.
// TODO: first compute the ASCII blocks for all tries and then the other
// nodes. ASCII blocks are more restricted in placement, as they require two
// blocks to be placed consecutively. Processing them first may improve
// sharing (at least one zero block can be expected to be saved.)
for _, t := range b.Trie {
b.Checksum += b.buildTrie(t)
}
// Compute the offsets for all the Compacters.
offset := uint32(0)
for i := range b.Compactions {
c := &b.Compactions[i]
c.Offset = offset
offset += c.maxHandle + 1
c.Cutoff = offset
}
// Compute the sizes of indexes.
// TODO: different byte positions could have different sizes. So far we have
// not found a case where this is beneficial.
imax := uint64(b.Compactions[len(b.Compactions)-1].Cutoff)
for _, ib := range b.IndexBlocks {
if x := uint64(ib.index.index); x > imax {
imax = x
}
}
b.IndexType, b.IndexSize = getIntType(imax)
}
func maxValue(n *node, max uint64) uint64 {
if n == nil {
return max
}
for _, c := range n.children {
max = maxValue(c, max)
}
for _, v := range n.values {
if max < v {
max = v
}
}
return max
}
func getIntType(v uint64) (string, int) {
switch {
case v < 1<<8:
return "uint8", 1
case v < 1<<16:
return "uint16", 2
case v < 1<<32:
return "uint32", 4
}
return "uint64", 8
}
const (
blockSize = 64
// Subtract two blocks to offset 0x80, the first continuation byte.
blockOffset = 2
// Subtract three blocks to offset 0xC0, the first non-ASCII starter.
rootBlockOffset = 3
)
var crcTable = crc64.MakeTable(crc64.ISO)
func (b *builder) buildTrie(t *Trie) uint64 {
n := t.root
// Get the ASCII offset. For the first trie, the ASCII block will be at
// position 0.
hasher := crc64.New(crcTable)
binary.Write(hasher, binary.BigEndian, n.values)
hash := hasher.Sum64()
v, ok := b.asciiBlockIdx[hash]
if !ok {
v = len(b.ValueBlocks)
b.asciiBlockIdx[hash] = v
b.ValueBlocks = append(b.ValueBlocks, n.values[:blockSize], n.values[blockSize:])
if v == 0 {
// Add the zero block at position 2 so that it will be assigned a
// zero reference in the lookup blocks.
// TODO: always do this? This would allow us to remove a check from
// the trie lookup, but at the expense of extra space. Analyze
// performance for unicode/norm.
b.ValueBlocks = append(b.ValueBlocks, make([]uint64, blockSize))
}
}
t.ASCIIIndex = v
// Compute remaining offsets.
t.Checksum = b.computeOffsets(n, true)
// We already subtracted the normal blockOffset from the index. Subtract the
// difference for starter bytes.
t.StarterIndex = n.index.index - (rootBlockOffset - blockOffset)
return t.Checksum
}
func (b *builder) computeOffsets(n *node, root bool) uint64 {
// For the first trie, the root lookup block will be at position 3, which is
// the offset for UTF-8 non-ASCII starter bytes.
first := len(b.IndexBlocks) == rootBlockOffset
if first {
b.IndexBlocks = append(b.IndexBlocks, n)
}
// We special-case the cases where all values recursively are 0. This allows
// for the use of a zero block to which all such values can be directed.
hash := uint64(0)
if n.children != nil || n.values != nil {
hasher := crc64.New(crcTable)
for _, c := range n.children {
var v uint64
if c != nil {
v = b.computeOffsets(c, false)
}
binary.Write(hasher, binary.BigEndian, v)
}
binary.Write(hasher, binary.BigEndian, n.values)
hash = hasher.Sum64()
}
if first {
b.indexBlockIdx[hash] = rootBlockOffset - blockOffset
}
// Compacters don't apply to internal nodes.
if n.children != nil {
v, ok := b.indexBlockIdx[hash]
if !ok {
v = len(b.IndexBlocks) - blockOffset
b.IndexBlocks = append(b.IndexBlocks, n)
b.indexBlockIdx[hash] = v
}
n.index = nodeIndex{0, v}
} else {
h, ok := b.valueBlockIdx[hash]
if !ok {
bestI, bestSize := 0, blockSize*b.ValueSize
for i, c := range b.Compactions[1:] {
if sz, ok := c.c.Size(n.values); ok && bestSize > sz {
bestI, bestSize = i+1, sz
}
}
c := &b.Compactions[bestI]
c.totalSize += bestSize
v := c.c.Store(n.values)
if c.maxHandle < v {
c.maxHandle = v
}
h = nodeIndex{bestI, int(v)}
b.valueBlockIdx[hash] = h
}
n.index = h
}
return hash
}

371
vendor/golang.org/x/text/internal/ucd/ucd.go generated vendored Normal file
View File

@@ -0,0 +1,371 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package ucd provides a parser for Unicode Character Database files, the
// format of which is defined in https://www.unicode.org/reports/tr44/. See
// https://www.unicode.org/Public/UCD/latest/ucd/ for example files.
//
// It currently does not support substitutions of missing fields.
package ucd // import "golang.org/x/text/internal/ucd"
import (
"bufio"
"errors"
"fmt"
"io"
"log"
"regexp"
"strconv"
"strings"
)
// UnicodeData.txt fields.
const (
CodePoint = iota
Name
GeneralCategory
CanonicalCombiningClass
BidiClass
DecompMapping
DecimalValue
DigitValue
NumericValue
BidiMirrored
Unicode1Name
ISOComment
SimpleUppercaseMapping
SimpleLowercaseMapping
SimpleTitlecaseMapping
)
// Parse calls f for each entry in the given reader of a UCD file. It will close
// the reader upon return. It will call log.Fatal if any error occurred.
//
// This implements the most common usage pattern of using Parser.
func Parse(r io.ReadCloser, f func(p *Parser)) {
defer r.Close()
p := New(r)
for p.Next() {
f(p)
}
if err := p.Err(); err != nil {
r.Close() // os.Exit will cause defers not to be called.
log.Fatal(err)
}
}
// An Option is used to configure a Parser.
type Option func(p *Parser)
func keepRanges(p *Parser) {
p.keepRanges = true
}
var (
// KeepRanges prevents the expansion of ranges. The raw ranges can be
// obtained by calling Range(0) on the parser.
KeepRanges Option = keepRanges
)
// The Part option register a handler for lines starting with a '@'. The text
// after a '@' is available as the first field. Comments are handled as usual.
func Part(f func(p *Parser)) Option {
return func(p *Parser) {
p.partHandler = f
}
}
// The CommentHandler option passes comments that are on a line by itself to
// a given handler.
func CommentHandler(f func(s string)) Option {
return func(p *Parser) {
p.commentHandler = f
}
}
// A Parser parses Unicode Character Database (UCD) files.
type Parser struct {
scanner *bufio.Scanner
keepRanges bool // Don't expand rune ranges in field 0.
err error
comment string
field []string
// parsedRange is needed in case Range(0) is called more than once for one
// field. In some cases this requires scanning ahead.
line int
parsedRange bool
rangeStart, rangeEnd rune
partHandler func(p *Parser)
commentHandler func(s string)
}
func (p *Parser) setError(err error, msg string) {
if p.err == nil && err != nil {
if msg == "" {
p.err = fmt.Errorf("ucd:line:%d: %v", p.line, err)
} else {
p.err = fmt.Errorf("ucd:line:%d:%s: %v", p.line, msg, err)
}
}
}
func (p *Parser) getField(i int) string {
if i >= len(p.field) {
return ""
}
return p.field[i]
}
// Err returns a non-nil error if any error occurred during parsing.
func (p *Parser) Err() error {
return p.err
}
// New returns a Parser for the given Reader.
func New(r io.Reader, o ...Option) *Parser {
p := &Parser{
scanner: bufio.NewScanner(r),
}
for _, f := range o {
f(p)
}
return p
}
// Next parses the next line in the file. It returns true if a line was parsed
// and false if it reached the end of the file.
func (p *Parser) Next() bool {
if !p.keepRanges && p.rangeStart < p.rangeEnd {
p.rangeStart++
return true
}
p.comment = ""
p.field = p.field[:0]
p.parsedRange = false
for p.scanner.Scan() && p.err == nil {
p.line++
s := p.scanner.Text()
if s == "" {
continue
}
if s[0] == '#' {
if p.commentHandler != nil {
p.commentHandler(strings.TrimSpace(s[1:]))
}
continue
}
// Parse line
if i := strings.IndexByte(s, '#'); i != -1 {
p.comment = strings.TrimSpace(s[i+1:])
s = s[:i]
}
if s[0] == '@' {
if p.partHandler != nil {
p.field = append(p.field, strings.TrimSpace(s[1:]))
p.partHandler(p)
p.field = p.field[:0]
}
p.comment = ""
continue
}
for {
i := strings.IndexByte(s, ';')
if i == -1 {
p.field = append(p.field, strings.TrimSpace(s))
break
}
p.field = append(p.field, strings.TrimSpace(s[:i]))
s = s[i+1:]
}
if !p.keepRanges {
p.rangeStart, p.rangeEnd = p.getRange(0)
}
return true
}
p.setError(p.scanner.Err(), "scanner failed")
return false
}
func parseRune(b string) (rune, error) {
if len(b) > 2 && b[0] == 'U' && b[1] == '+' {
b = b[2:]
}
x, err := strconv.ParseUint(b, 16, 32)
return rune(x), err
}
func (p *Parser) parseRune(s string) rune {
x, err := parseRune(s)
p.setError(err, "failed to parse rune")
return x
}
// Rune parses and returns field i as a rune.
func (p *Parser) Rune(i int) rune {
if i > 0 || p.keepRanges {
return p.parseRune(p.getField(i))
}
return p.rangeStart
}
// Runes interprets and returns field i as a sequence of runes.
func (p *Parser) Runes(i int) (runes []rune) {
add := func(s string) {
if s = strings.TrimSpace(s); len(s) > 0 {
runes = append(runes, p.parseRune(s))
}
}
for b := p.getField(i); ; {
i := strings.IndexByte(b, ' ')
if i == -1 {
add(b)
break
}
add(b[:i])
b = b[i+1:]
}
return
}
var (
errIncorrectLegacyRange = errors.New("ucd: unmatched <* First>")
// reRange matches one line of a legacy rune range.
reRange = regexp.MustCompile("^([0-9A-F]*);<([^,]*), ([^>]*)>(.*)$")
)
// Range parses and returns field i as a rune range. A range is inclusive at
// both ends. If the field only has one rune, first and last will be identical.
// It supports the legacy format for ranges used in UnicodeData.txt.
func (p *Parser) Range(i int) (first, last rune) {
if !p.keepRanges {
return p.rangeStart, p.rangeStart
}
return p.getRange(i)
}
func (p *Parser) getRange(i int) (first, last rune) {
b := p.getField(i)
if k := strings.Index(b, ".."); k != -1 {
return p.parseRune(b[:k]), p.parseRune(b[k+2:])
}
// The first field may not be a rune, in which case we may ignore any error
// and set the range as 0..0.
x, err := parseRune(b)
if err != nil {
// Disable range parsing henceforth. This ensures that an error will be
// returned if the user subsequently will try to parse this field as
// a Rune.
p.keepRanges = true
}
// Special case for UnicodeData that was retained for backwards compatibility.
if i == 0 && len(p.field) > 1 && strings.HasSuffix(p.field[1], "First>") {
if p.parsedRange {
return p.rangeStart, p.rangeEnd
}
mf := reRange.FindStringSubmatch(p.scanner.Text())
p.line++
if mf == nil || !p.scanner.Scan() {
p.setError(errIncorrectLegacyRange, "")
return x, x
}
// Using Bytes would be more efficient here, but Text is a lot easier
// and this is not a frequent case.
ml := reRange.FindStringSubmatch(p.scanner.Text())
if ml == nil || mf[2] != ml[2] || ml[3] != "Last" || mf[4] != ml[4] {
p.setError(errIncorrectLegacyRange, "")
return x, x
}
p.rangeStart, p.rangeEnd = x, p.parseRune(p.scanner.Text()[:len(ml[1])])
p.parsedRange = true
return p.rangeStart, p.rangeEnd
}
return x, x
}
// bools recognizes all valid UCD boolean values.
var bools = map[string]bool{
"": false,
"N": false,
"No": false,
"F": false,
"False": false,
"Y": true,
"Yes": true,
"T": true,
"True": true,
}
// Bool parses and returns field i as a boolean value.
func (p *Parser) Bool(i int) bool {
f := p.getField(i)
for s, v := range bools {
if f == s {
return v
}
}
p.setError(strconv.ErrSyntax, "error parsing bool")
return false
}
// Int parses and returns field i as an integer value.
func (p *Parser) Int(i int) int {
x, err := strconv.ParseInt(string(p.getField(i)), 10, 64)
p.setError(err, "error parsing int")
return int(x)
}
// Uint parses and returns field i as an unsigned integer value.
func (p *Parser) Uint(i int) uint {
x, err := strconv.ParseUint(string(p.getField(i)), 10, 64)
p.setError(err, "error parsing uint")
return uint(x)
}
// Float parses and returns field i as a decimal value.
func (p *Parser) Float(i int) float64 {
x, err := strconv.ParseFloat(string(p.getField(i)), 64)
p.setError(err, "error parsing float")
return x
}
// String parses and returns field i as a string value.
func (p *Parser) String(i int) string {
return string(p.getField(i))
}
// Strings parses and returns field i as a space-separated list of strings.
func (p *Parser) Strings(i int) []string {
ss := strings.Split(string(p.getField(i)), " ")
for i, s := range ss {
ss[i] = strings.TrimSpace(s)
}
return ss
}
// Comment returns the comments for the current line.
func (p *Parser) Comment() string {
return string(p.comment)
}
var errUndefinedEnum = errors.New("ucd: undefined enum value")
// Enum interprets and returns field i as a value that must be one of the values
// in enum.
func (p *Parser) Enum(i int, enum ...string) string {
f := p.getField(i)
for _, s := range enum {
if f == s {
return s
}
}
p.setError(errUndefinedEnum, "error parsing enum")
return ""
}