mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-09 20:17:41 +00:00
code-generator: use cases.Title instead of strings.Title
This pulls in language.Und for basic capitalisation. Signed-off-by: Stephen Kitt <skitt@redhat.com>
This commit is contained in:
parent
9df98f3d39
commit
ecb965287c
@ -21,6 +21,8 @@ import (
|
|||||||
"path"
|
"path"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"golang.org/x/text/cases"
|
||||||
|
"golang.org/x/text/language"
|
||||||
"k8s.io/gengo/v2/generator"
|
"k8s.io/gengo/v2/generator"
|
||||||
"k8s.io/gengo/v2/namer"
|
"k8s.io/gengo/v2/namer"
|
||||||
"k8s.io/gengo/v2/types"
|
"k8s.io/gengo/v2/types"
|
||||||
@ -43,6 +45,8 @@ type genFakeForType struct {
|
|||||||
|
|
||||||
var _ generator.Generator = &genFakeForType{}
|
var _ generator.Generator = &genFakeForType{}
|
||||||
|
|
||||||
|
var titler = cases.Title(language.Und)
|
||||||
|
|
||||||
// Filter ignores all but one type because we're making a single file per type.
|
// Filter ignores all but one type because we're making a single file per type.
|
||||||
func (g *genFakeForType) Filter(c *generator.Context, t *types.Type) bool { return t == g.typeToMatch }
|
func (g *genFakeForType) Filter(c *generator.Context, t *types.Type) bool { return t == g.typeToMatch }
|
||||||
|
|
||||||
@ -299,9 +303,7 @@ func (g *genFakeForType) GenerateType(c *generator.Context, t *types.Type, w io.
|
|||||||
// TODO: Make the verbs in templates parametrized so the strings.Replace() is
|
// TODO: Make the verbs in templates parametrized so the strings.Replace() is
|
||||||
// not needed.
|
// not needed.
|
||||||
func adjustTemplate(name, verbType, template string) string {
|
func adjustTemplate(name, verbType, template string) string {
|
||||||
//nolint:staticcheck
|
return strings.ReplaceAll(template, " "+titler.String(verbType), " "+name)
|
||||||
// TODO: convert this to use golang.org/x/text/cases
|
|
||||||
return strings.ReplaceAll(template, " "+strings.Title(verbType), " "+name)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// template for the struct that implements the type's interface
|
// template for the struct that implements the type's interface
|
||||||
|
@ -21,6 +21,8 @@ import (
|
|||||||
"path"
|
"path"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"golang.org/x/text/cases"
|
||||||
|
"golang.org/x/text/language"
|
||||||
"k8s.io/gengo/v2/generator"
|
"k8s.io/gengo/v2/generator"
|
||||||
"k8s.io/gengo/v2/namer"
|
"k8s.io/gengo/v2/namer"
|
||||||
"k8s.io/gengo/v2/types"
|
"k8s.io/gengo/v2/types"
|
||||||
@ -44,6 +46,8 @@ type genClientForType struct {
|
|||||||
|
|
||||||
var _ generator.Generator = &genClientForType{}
|
var _ generator.Generator = &genClientForType{}
|
||||||
|
|
||||||
|
var titler = cases.Title(language.Und)
|
||||||
|
|
||||||
// Filter ignores all but one type because we're making a single file per type.
|
// Filter ignores all but one type because we're making a single file per type.
|
||||||
func (g *genClientForType) Filter(c *generator.Context, t *types.Type) bool {
|
func (g *genClientForType) Filter(c *generator.Context, t *types.Type) bool {
|
||||||
return t == g.typeToMatch
|
return t == g.typeToMatch
|
||||||
@ -119,13 +123,9 @@ func (g *genClientForType) GenerateType(c *generator.Context, t *types.Type, w i
|
|||||||
}
|
}
|
||||||
var updatedVerbtemplate string
|
var updatedVerbtemplate string
|
||||||
if _, exists := subresourceDefaultVerbTemplates[e.VerbType]; e.IsSubresource() && exists {
|
if _, exists := subresourceDefaultVerbTemplates[e.VerbType]; e.IsSubresource() && exists {
|
||||||
//nolint:staticcheck
|
updatedVerbtemplate = e.VerbName + "(" + strings.TrimPrefix(subresourceDefaultVerbTemplates[e.VerbType], titler.String(e.VerbType)+"(")
|
||||||
// TODO: convert this to use golang.org/x/text/cases
|
|
||||||
updatedVerbtemplate = e.VerbName + "(" + strings.TrimPrefix(subresourceDefaultVerbTemplates[e.VerbType], strings.Title(e.VerbType)+"(")
|
|
||||||
} else {
|
} else {
|
||||||
//nolint:staticcheck
|
updatedVerbtemplate = e.VerbName + "(" + strings.TrimPrefix(defaultVerbTemplates[e.VerbType], titler.String(e.VerbType)+"(")
|
||||||
// TODO: convert this to use golang.org/x/text/cases
|
|
||||||
updatedVerbtemplate = e.VerbName + "(" + strings.TrimPrefix(defaultVerbTemplates[e.VerbType], strings.Title(e.VerbType)+"(")
|
|
||||||
}
|
}
|
||||||
extendedMethod := extendedInterfaceMethod{
|
extendedMethod := extendedInterfaceMethod{
|
||||||
template: updatedVerbtemplate,
|
template: updatedVerbtemplate,
|
||||||
@ -348,9 +348,7 @@ func (g *genClientForType) GenerateType(c *generator.Context, t *types.Type, w i
|
|||||||
// TODO: Make the verbs in templates parametrized so the strings.Replace() is
|
// TODO: Make the verbs in templates parametrized so the strings.Replace() is
|
||||||
// not needed.
|
// not needed.
|
||||||
func adjustTemplate(name, verbType, template string) string {
|
func adjustTemplate(name, verbType, template string) string {
|
||||||
//nolint:staticcheck
|
return strings.ReplaceAll(template, " "+titler.String(verbType), " "+name)
|
||||||
// TODO: convert this to use golang.org/x/text/cases
|
|
||||||
return strings.ReplaceAll(template, " "+strings.Title(verbType), " "+name)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func generateInterface(defaultVerbTemplates map[string]string, tags util.Tags) string {
|
func generateInterface(defaultVerbTemplates map[string]string, tags util.Tags) string {
|
||||||
|
@ -11,6 +11,7 @@ require (
|
|||||||
github.com/google/go-cmp v0.6.0
|
github.com/google/go-cmp v0.6.0
|
||||||
github.com/google/gofuzz v1.2.0
|
github.com/google/gofuzz v1.2.0
|
||||||
github.com/spf13/pflag v1.0.5
|
github.com/spf13/pflag v1.0.5
|
||||||
|
golang.org/x/text v0.14.0
|
||||||
gopkg.in/yaml.v2 v2.4.0
|
gopkg.in/yaml.v2 v2.4.0
|
||||||
k8s.io/apimachinery v0.0.0
|
k8s.io/apimachinery v0.0.0
|
||||||
k8s.io/gengo/v2 v2.0.0-20240228010128-51d4e06bde70
|
k8s.io/gengo/v2 v2.0.0-20240228010128-51d4e06bde70
|
||||||
|
162
vendor/golang.org/x/text/cases/cases.go
generated
vendored
Normal file
162
vendor/golang.org/x/text/cases/cases.go
generated
vendored
Normal file
@ -0,0 +1,162 @@
|
|||||||
|
// Copyright 2014 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
//go:generate go run gen.go gen_trieval.go
|
||||||
|
|
||||||
|
// Package cases provides general and language-specific case mappers.
|
||||||
|
package cases // import "golang.org/x/text/cases"
|
||||||
|
|
||||||
|
import (
|
||||||
|
"golang.org/x/text/language"
|
||||||
|
"golang.org/x/text/transform"
|
||||||
|
)
|
||||||
|
|
||||||
|
// References:
|
||||||
|
// - Unicode Reference Manual Chapter 3.13, 4.2, and 5.18.
|
||||||
|
// - https://www.unicode.org/reports/tr29/
|
||||||
|
// - https://www.unicode.org/Public/6.3.0/ucd/CaseFolding.txt
|
||||||
|
// - https://www.unicode.org/Public/6.3.0/ucd/SpecialCasing.txt
|
||||||
|
// - https://www.unicode.org/Public/6.3.0/ucd/DerivedCoreProperties.txt
|
||||||
|
// - https://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakProperty.txt
|
||||||
|
// - https://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakTest.txt
|
||||||
|
// - http://userguide.icu-project.org/transforms/casemappings
|
||||||
|
|
||||||
|
// TODO:
|
||||||
|
// - Case folding
|
||||||
|
// - Wide and Narrow?
|
||||||
|
// - Segmenter option for title casing.
|
||||||
|
// - ASCII fast paths
|
||||||
|
// - Encode Soft-Dotted property within trie somehow.
|
||||||
|
|
||||||
|
// A Caser transforms given input to a certain case. It implements
|
||||||
|
// transform.Transformer.
|
||||||
|
//
|
||||||
|
// A Caser may be stateful and should therefore not be shared between
|
||||||
|
// goroutines.
|
||||||
|
type Caser struct {
|
||||||
|
t transform.SpanningTransformer
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bytes returns a new byte slice with the result of converting b to the case
|
||||||
|
// form implemented by c.
|
||||||
|
func (c Caser) Bytes(b []byte) []byte {
|
||||||
|
b, _, _ = transform.Bytes(c.t, b)
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
// String returns a string with the result of transforming s to the case form
|
||||||
|
// implemented by c.
|
||||||
|
func (c Caser) String(s string) string {
|
||||||
|
s, _, _ = transform.String(c.t, s)
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset resets the Caser to be reused for new input after a previous call to
|
||||||
|
// Transform.
|
||||||
|
func (c Caser) Reset() { c.t.Reset() }
|
||||||
|
|
||||||
|
// Transform implements the transform.Transformer interface and transforms the
|
||||||
|
// given input to the case form implemented by c.
|
||||||
|
func (c Caser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||||
|
return c.t.Transform(dst, src, atEOF)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Span implements the transform.SpanningTransformer interface.
|
||||||
|
func (c Caser) Span(src []byte, atEOF bool) (n int, err error) {
|
||||||
|
return c.t.Span(src, atEOF)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Upper returns a Caser for language-specific uppercasing.
|
||||||
|
func Upper(t language.Tag, opts ...Option) Caser {
|
||||||
|
return Caser{makeUpper(t, getOpts(opts...))}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Lower returns a Caser for language-specific lowercasing.
|
||||||
|
func Lower(t language.Tag, opts ...Option) Caser {
|
||||||
|
return Caser{makeLower(t, getOpts(opts...))}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Title returns a Caser for language-specific title casing. It uses an
|
||||||
|
// approximation of the default Unicode Word Break algorithm.
|
||||||
|
func Title(t language.Tag, opts ...Option) Caser {
|
||||||
|
return Caser{makeTitle(t, getOpts(opts...))}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fold returns a Caser that implements Unicode case folding. The returned Caser
|
||||||
|
// is stateless and safe to use concurrently by multiple goroutines.
|
||||||
|
//
|
||||||
|
// Case folding does not normalize the input and may not preserve a normal form.
|
||||||
|
// Use the collate or search package for more convenient and linguistically
|
||||||
|
// sound comparisons. Use golang.org/x/text/secure/precis for string comparisons
|
||||||
|
// where security aspects are a concern.
|
||||||
|
func Fold(opts ...Option) Caser {
|
||||||
|
return Caser{makeFold(getOpts(opts...))}
|
||||||
|
}
|
||||||
|
|
||||||
|
// An Option is used to modify the behavior of a Caser.
|
||||||
|
type Option func(o options) options
|
||||||
|
|
||||||
|
// TODO: consider these options to take a boolean as well, like FinalSigma.
|
||||||
|
// The advantage of using this approach is that other providers of a lower-case
|
||||||
|
// algorithm could set different defaults by prefixing a user-provided slice
|
||||||
|
// of options with their own. This is handy, for instance, for the precis
|
||||||
|
// package which would override the default to not handle the Greek final sigma.
|
||||||
|
|
||||||
|
var (
|
||||||
|
// NoLower disables the lowercasing of non-leading letters for a title
|
||||||
|
// caser.
|
||||||
|
NoLower Option = noLower
|
||||||
|
|
||||||
|
// Compact omits mappings in case folding for characters that would grow the
|
||||||
|
// input. (Unimplemented.)
|
||||||
|
Compact Option = compact
|
||||||
|
)
|
||||||
|
|
||||||
|
// TODO: option to preserve a normal form, if applicable?
|
||||||
|
|
||||||
|
type options struct {
|
||||||
|
noLower bool
|
||||||
|
simple bool
|
||||||
|
|
||||||
|
// TODO: segmenter, max ignorable, alternative versions, etc.
|
||||||
|
|
||||||
|
ignoreFinalSigma bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func getOpts(o ...Option) (res options) {
|
||||||
|
for _, f := range o {
|
||||||
|
res = f(res)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func noLower(o options) options {
|
||||||
|
o.noLower = true
|
||||||
|
return o
|
||||||
|
}
|
||||||
|
|
||||||
|
func compact(o options) options {
|
||||||
|
o.simple = true
|
||||||
|
return o
|
||||||
|
}
|
||||||
|
|
||||||
|
// HandleFinalSigma specifies whether the special handling of Greek final sigma
|
||||||
|
// should be enabled. Unicode prescribes handling the Greek final sigma for all
|
||||||
|
// locales, but standards like IDNA and PRECIS override this default.
|
||||||
|
func HandleFinalSigma(enable bool) Option {
|
||||||
|
if enable {
|
||||||
|
return handleFinalSigma
|
||||||
|
}
|
||||||
|
return ignoreFinalSigma
|
||||||
|
}
|
||||||
|
|
||||||
|
func ignoreFinalSigma(o options) options {
|
||||||
|
o.ignoreFinalSigma = true
|
||||||
|
return o
|
||||||
|
}
|
||||||
|
|
||||||
|
func handleFinalSigma(o options) options {
|
||||||
|
o.ignoreFinalSigma = false
|
||||||
|
return o
|
||||||
|
}
|
376
vendor/golang.org/x/text/cases/context.go
generated
vendored
Normal file
376
vendor/golang.org/x/text/cases/context.go
generated
vendored
Normal file
@ -0,0 +1,376 @@
|
|||||||
|
// Copyright 2014 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package cases
|
||||||
|
|
||||||
|
import "golang.org/x/text/transform"
|
||||||
|
|
||||||
|
// A context is used for iterating over source bytes, fetching case info and
|
||||||
|
// writing to a destination buffer.
|
||||||
|
//
|
||||||
|
// Casing operations may need more than one rune of context to decide how a rune
|
||||||
|
// should be cased. Casing implementations should call checkpoint on context
|
||||||
|
// whenever it is known to be safe to return the runes processed so far.
|
||||||
|
//
|
||||||
|
// It is recommended for implementations to not allow for more than 30 case
|
||||||
|
// ignorables as lookahead (analogous to the limit in norm) and to use state if
|
||||||
|
// unbounded lookahead is needed for cased runes.
|
||||||
|
type context struct {
|
||||||
|
dst, src []byte
|
||||||
|
atEOF bool
|
||||||
|
|
||||||
|
pDst int // pDst points past the last written rune in dst.
|
||||||
|
pSrc int // pSrc points to the start of the currently scanned rune.
|
||||||
|
|
||||||
|
// checkpoints safe to return in Transform, where nDst <= pDst and nSrc <= pSrc.
|
||||||
|
nDst, nSrc int
|
||||||
|
err error
|
||||||
|
|
||||||
|
sz int // size of current rune
|
||||||
|
info info // case information of currently scanned rune
|
||||||
|
|
||||||
|
// State preserved across calls to Transform.
|
||||||
|
isMidWord bool // false if next cased letter needs to be title-cased.
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *context) Reset() {
|
||||||
|
c.isMidWord = false
|
||||||
|
}
|
||||||
|
|
||||||
|
// ret returns the return values for the Transform method. It checks whether
|
||||||
|
// there were insufficient bytes in src to complete and introduces an error
|
||||||
|
// accordingly, if necessary.
|
||||||
|
func (c *context) ret() (nDst, nSrc int, err error) {
|
||||||
|
if c.err != nil || c.nSrc == len(c.src) {
|
||||||
|
return c.nDst, c.nSrc, c.err
|
||||||
|
}
|
||||||
|
// This point is only reached by mappers if there was no short destination
|
||||||
|
// buffer. This means that the source buffer was exhausted and that c.sz was
|
||||||
|
// set to 0 by next.
|
||||||
|
if c.atEOF && c.pSrc == len(c.src) {
|
||||||
|
return c.pDst, c.pSrc, nil
|
||||||
|
}
|
||||||
|
return c.nDst, c.nSrc, transform.ErrShortSrc
|
||||||
|
}
|
||||||
|
|
||||||
|
// retSpan returns the return values for the Span method. It checks whether
|
||||||
|
// there were insufficient bytes in src to complete and introduces an error
|
||||||
|
// accordingly, if necessary.
|
||||||
|
func (c *context) retSpan() (n int, err error) {
|
||||||
|
_, nSrc, err := c.ret()
|
||||||
|
return nSrc, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// checkpoint sets the return value buffer points for Transform to the current
|
||||||
|
// positions.
|
||||||
|
func (c *context) checkpoint() {
|
||||||
|
if c.err == nil {
|
||||||
|
c.nDst, c.nSrc = c.pDst, c.pSrc+c.sz
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// unreadRune causes the last rune read by next to be reread on the next
|
||||||
|
// invocation of next. Only one unreadRune may be called after a call to next.
|
||||||
|
func (c *context) unreadRune() {
|
||||||
|
c.sz = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *context) next() bool {
|
||||||
|
c.pSrc += c.sz
|
||||||
|
if c.pSrc == len(c.src) || c.err != nil {
|
||||||
|
c.info, c.sz = 0, 0
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
v, sz := trie.lookup(c.src[c.pSrc:])
|
||||||
|
c.info, c.sz = info(v), sz
|
||||||
|
if c.sz == 0 {
|
||||||
|
if c.atEOF {
|
||||||
|
// A zero size means we have an incomplete rune. If we are atEOF,
|
||||||
|
// this means it is an illegal rune, which we will consume one
|
||||||
|
// byte at a time.
|
||||||
|
c.sz = 1
|
||||||
|
} else {
|
||||||
|
c.err = transform.ErrShortSrc
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// writeBytes adds bytes to dst.
|
||||||
|
func (c *context) writeBytes(b []byte) bool {
|
||||||
|
if len(c.dst)-c.pDst < len(b) {
|
||||||
|
c.err = transform.ErrShortDst
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
// This loop is faster than using copy.
|
||||||
|
for _, ch := range b {
|
||||||
|
c.dst[c.pDst] = ch
|
||||||
|
c.pDst++
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// writeString writes the given string to dst.
|
||||||
|
func (c *context) writeString(s string) bool {
|
||||||
|
if len(c.dst)-c.pDst < len(s) {
|
||||||
|
c.err = transform.ErrShortDst
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
// This loop is faster than using copy.
|
||||||
|
for i := 0; i < len(s); i++ {
|
||||||
|
c.dst[c.pDst] = s[i]
|
||||||
|
c.pDst++
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// copy writes the current rune to dst.
|
||||||
|
func (c *context) copy() bool {
|
||||||
|
return c.writeBytes(c.src[c.pSrc : c.pSrc+c.sz])
|
||||||
|
}
|
||||||
|
|
||||||
|
// copyXOR copies the current rune to dst and modifies it by applying the XOR
|
||||||
|
// pattern of the case info. It is the responsibility of the caller to ensure
|
||||||
|
// that this is a rune with a XOR pattern defined.
|
||||||
|
func (c *context) copyXOR() bool {
|
||||||
|
if !c.copy() {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if c.info&xorIndexBit == 0 {
|
||||||
|
// Fast path for 6-bit XOR pattern, which covers most cases.
|
||||||
|
c.dst[c.pDst-1] ^= byte(c.info >> xorShift)
|
||||||
|
} else {
|
||||||
|
// Interpret XOR bits as an index.
|
||||||
|
// TODO: test performance for unrolling this loop. Verify that we have
|
||||||
|
// at least two bytes and at most three.
|
||||||
|
idx := c.info >> xorShift
|
||||||
|
for p := c.pDst - 1; ; p-- {
|
||||||
|
c.dst[p] ^= xorData[idx]
|
||||||
|
idx--
|
||||||
|
if xorData[idx] == 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// hasPrefix returns true if src[pSrc:] starts with the given string.
|
||||||
|
func (c *context) hasPrefix(s string) bool {
|
||||||
|
b := c.src[c.pSrc:]
|
||||||
|
if len(b) < len(s) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for i, c := range b[:len(s)] {
|
||||||
|
if c != s[i] {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// caseType returns an info with only the case bits, normalized to either
|
||||||
|
// cLower, cUpper, cTitle or cUncased.
|
||||||
|
func (c *context) caseType() info {
|
||||||
|
cm := c.info & 0x7
|
||||||
|
if cm < 4 {
|
||||||
|
return cm
|
||||||
|
}
|
||||||
|
if cm >= cXORCase {
|
||||||
|
// xor the last bit of the rune with the case type bits.
|
||||||
|
b := c.src[c.pSrc+c.sz-1]
|
||||||
|
return info(b&1) ^ cm&0x3
|
||||||
|
}
|
||||||
|
if cm == cIgnorableCased {
|
||||||
|
return cLower
|
||||||
|
}
|
||||||
|
return cUncased
|
||||||
|
}
|
||||||
|
|
||||||
|
// lower writes the lowercase version of the current rune to dst.
|
||||||
|
func lower(c *context) bool {
|
||||||
|
ct := c.caseType()
|
||||||
|
if c.info&hasMappingMask == 0 || ct == cLower {
|
||||||
|
return c.copy()
|
||||||
|
}
|
||||||
|
if c.info&exceptionBit == 0 {
|
||||||
|
return c.copyXOR()
|
||||||
|
}
|
||||||
|
e := exceptions[c.info>>exceptionShift:]
|
||||||
|
offset := 2 + e[0]&lengthMask // size of header + fold string
|
||||||
|
if nLower := (e[1] >> lengthBits) & lengthMask; nLower != noChange {
|
||||||
|
return c.writeString(e[offset : offset+nLower])
|
||||||
|
}
|
||||||
|
return c.copy()
|
||||||
|
}
|
||||||
|
|
||||||
|
func isLower(c *context) bool {
|
||||||
|
ct := c.caseType()
|
||||||
|
if c.info&hasMappingMask == 0 || ct == cLower {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if c.info&exceptionBit == 0 {
|
||||||
|
c.err = transform.ErrEndOfSpan
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
e := exceptions[c.info>>exceptionShift:]
|
||||||
|
if nLower := (e[1] >> lengthBits) & lengthMask; nLower != noChange {
|
||||||
|
c.err = transform.ErrEndOfSpan
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// upper writes the uppercase version of the current rune to dst.
|
||||||
|
func upper(c *context) bool {
|
||||||
|
ct := c.caseType()
|
||||||
|
if c.info&hasMappingMask == 0 || ct == cUpper {
|
||||||
|
return c.copy()
|
||||||
|
}
|
||||||
|
if c.info&exceptionBit == 0 {
|
||||||
|
return c.copyXOR()
|
||||||
|
}
|
||||||
|
e := exceptions[c.info>>exceptionShift:]
|
||||||
|
offset := 2 + e[0]&lengthMask // size of header + fold string
|
||||||
|
// Get length of first special case mapping.
|
||||||
|
n := (e[1] >> lengthBits) & lengthMask
|
||||||
|
if ct == cTitle {
|
||||||
|
// The first special case mapping is for lower. Set n to the second.
|
||||||
|
if n == noChange {
|
||||||
|
n = 0
|
||||||
|
}
|
||||||
|
n, e = e[1]&lengthMask, e[n:]
|
||||||
|
}
|
||||||
|
if n != noChange {
|
||||||
|
return c.writeString(e[offset : offset+n])
|
||||||
|
}
|
||||||
|
return c.copy()
|
||||||
|
}
|
||||||
|
|
||||||
|
// isUpper writes the isUppercase version of the current rune to dst.
|
||||||
|
func isUpper(c *context) bool {
|
||||||
|
ct := c.caseType()
|
||||||
|
if c.info&hasMappingMask == 0 || ct == cUpper {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if c.info&exceptionBit == 0 {
|
||||||
|
c.err = transform.ErrEndOfSpan
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
e := exceptions[c.info>>exceptionShift:]
|
||||||
|
// Get length of first special case mapping.
|
||||||
|
n := (e[1] >> lengthBits) & lengthMask
|
||||||
|
if ct == cTitle {
|
||||||
|
n = e[1] & lengthMask
|
||||||
|
}
|
||||||
|
if n != noChange {
|
||||||
|
c.err = transform.ErrEndOfSpan
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// title writes the title case version of the current rune to dst.
|
||||||
|
func title(c *context) bool {
|
||||||
|
ct := c.caseType()
|
||||||
|
if c.info&hasMappingMask == 0 || ct == cTitle {
|
||||||
|
return c.copy()
|
||||||
|
}
|
||||||
|
if c.info&exceptionBit == 0 {
|
||||||
|
if ct == cLower {
|
||||||
|
return c.copyXOR()
|
||||||
|
}
|
||||||
|
return c.copy()
|
||||||
|
}
|
||||||
|
// Get the exception data.
|
||||||
|
e := exceptions[c.info>>exceptionShift:]
|
||||||
|
offset := 2 + e[0]&lengthMask // size of header + fold string
|
||||||
|
|
||||||
|
nFirst := (e[1] >> lengthBits) & lengthMask
|
||||||
|
if nTitle := e[1] & lengthMask; nTitle != noChange {
|
||||||
|
if nFirst != noChange {
|
||||||
|
e = e[nFirst:]
|
||||||
|
}
|
||||||
|
return c.writeString(e[offset : offset+nTitle])
|
||||||
|
}
|
||||||
|
if ct == cLower && nFirst != noChange {
|
||||||
|
// Use the uppercase version instead.
|
||||||
|
return c.writeString(e[offset : offset+nFirst])
|
||||||
|
}
|
||||||
|
// Already in correct case.
|
||||||
|
return c.copy()
|
||||||
|
}
|
||||||
|
|
||||||
|
// isTitle reports whether the current rune is in title case.
|
||||||
|
func isTitle(c *context) bool {
|
||||||
|
ct := c.caseType()
|
||||||
|
if c.info&hasMappingMask == 0 || ct == cTitle {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if c.info&exceptionBit == 0 {
|
||||||
|
if ct == cLower {
|
||||||
|
c.err = transform.ErrEndOfSpan
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
// Get the exception data.
|
||||||
|
e := exceptions[c.info>>exceptionShift:]
|
||||||
|
if nTitle := e[1] & lengthMask; nTitle != noChange {
|
||||||
|
c.err = transform.ErrEndOfSpan
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
nFirst := (e[1] >> lengthBits) & lengthMask
|
||||||
|
if ct == cLower && nFirst != noChange {
|
||||||
|
c.err = transform.ErrEndOfSpan
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// foldFull writes the foldFull version of the current rune to dst.
|
||||||
|
func foldFull(c *context) bool {
|
||||||
|
if c.info&hasMappingMask == 0 {
|
||||||
|
return c.copy()
|
||||||
|
}
|
||||||
|
ct := c.caseType()
|
||||||
|
if c.info&exceptionBit == 0 {
|
||||||
|
if ct != cLower || c.info&inverseFoldBit != 0 {
|
||||||
|
return c.copyXOR()
|
||||||
|
}
|
||||||
|
return c.copy()
|
||||||
|
}
|
||||||
|
e := exceptions[c.info>>exceptionShift:]
|
||||||
|
n := e[0] & lengthMask
|
||||||
|
if n == 0 {
|
||||||
|
if ct == cLower {
|
||||||
|
return c.copy()
|
||||||
|
}
|
||||||
|
n = (e[1] >> lengthBits) & lengthMask
|
||||||
|
}
|
||||||
|
return c.writeString(e[2 : 2+n])
|
||||||
|
}
|
||||||
|
|
||||||
|
// isFoldFull reports whether the current run is mapped to foldFull
|
||||||
|
func isFoldFull(c *context) bool {
|
||||||
|
if c.info&hasMappingMask == 0 {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
ct := c.caseType()
|
||||||
|
if c.info&exceptionBit == 0 {
|
||||||
|
if ct != cLower || c.info&inverseFoldBit != 0 {
|
||||||
|
c.err = transform.ErrEndOfSpan
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
e := exceptions[c.info>>exceptionShift:]
|
||||||
|
n := e[0] & lengthMask
|
||||||
|
if n == 0 && ct == cLower {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
c.err = transform.ErrEndOfSpan
|
||||||
|
return false
|
||||||
|
}
|
34
vendor/golang.org/x/text/cases/fold.go
generated
vendored
Normal file
34
vendor/golang.org/x/text/cases/fold.go
generated
vendored
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
// Copyright 2016 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package cases
|
||||||
|
|
||||||
|
import "golang.org/x/text/transform"
|
||||||
|
|
||||||
|
type caseFolder struct{ transform.NopResetter }
|
||||||
|
|
||||||
|
// caseFolder implements the Transformer interface for doing case folding.
|
||||||
|
func (t *caseFolder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||||
|
c := context{dst: dst, src: src, atEOF: atEOF}
|
||||||
|
for c.next() {
|
||||||
|
foldFull(&c)
|
||||||
|
c.checkpoint()
|
||||||
|
}
|
||||||
|
return c.ret()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *caseFolder) Span(src []byte, atEOF bool) (n int, err error) {
|
||||||
|
c := context{src: src, atEOF: atEOF}
|
||||||
|
for c.next() && isFoldFull(&c) {
|
||||||
|
c.checkpoint()
|
||||||
|
}
|
||||||
|
return c.retSpan()
|
||||||
|
}
|
||||||
|
|
||||||
|
func makeFold(o options) transform.SpanningTransformer {
|
||||||
|
// TODO: Special case folding, through option Language, Special/Turkic, or
|
||||||
|
// both.
|
||||||
|
// TODO: Implement Compact options.
|
||||||
|
return &caseFolder{}
|
||||||
|
}
|
61
vendor/golang.org/x/text/cases/icu.go
generated
vendored
Normal file
61
vendor/golang.org/x/text/cases/icu.go
generated
vendored
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
// Copyright 2016 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
//go:build icu
|
||||||
|
|
||||||
|
package cases
|
||||||
|
|
||||||
|
// Ideally these functions would be defined in a test file, but go test doesn't
|
||||||
|
// allow CGO in tests. The build tag should ensure either way that these
|
||||||
|
// functions will not end up in the package.
|
||||||
|
|
||||||
|
// TODO: Ensure that the correct ICU version is set.
|
||||||
|
|
||||||
|
/*
|
||||||
|
#cgo LDFLAGS: -licui18n.57 -licuuc.57
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <unicode/ustring.h>
|
||||||
|
#include <unicode/utypes.h>
|
||||||
|
#include <unicode/localpointer.h>
|
||||||
|
#include <unicode/ucasemap.h>
|
||||||
|
*/
|
||||||
|
import "C"
|
||||||
|
|
||||||
|
import "unsafe"
|
||||||
|
|
||||||
|
func doICU(tag, caser, input string) string {
|
||||||
|
err := C.UErrorCode(0)
|
||||||
|
loc := C.CString(tag)
|
||||||
|
cm := C.ucasemap_open(loc, C.uint32_t(0), &err)
|
||||||
|
|
||||||
|
buf := make([]byte, len(input)*4)
|
||||||
|
dst := (*C.char)(unsafe.Pointer(&buf[0]))
|
||||||
|
src := C.CString(input)
|
||||||
|
|
||||||
|
cn := C.int32_t(0)
|
||||||
|
|
||||||
|
switch caser {
|
||||||
|
case "fold":
|
||||||
|
cn = C.ucasemap_utf8FoldCase(cm,
|
||||||
|
dst, C.int32_t(len(buf)),
|
||||||
|
src, C.int32_t(len(input)),
|
||||||
|
&err)
|
||||||
|
case "lower":
|
||||||
|
cn = C.ucasemap_utf8ToLower(cm,
|
||||||
|
dst, C.int32_t(len(buf)),
|
||||||
|
src, C.int32_t(len(input)),
|
||||||
|
&err)
|
||||||
|
case "upper":
|
||||||
|
cn = C.ucasemap_utf8ToUpper(cm,
|
||||||
|
dst, C.int32_t(len(buf)),
|
||||||
|
src, C.int32_t(len(input)),
|
||||||
|
&err)
|
||||||
|
case "title":
|
||||||
|
cn = C.ucasemap_utf8ToTitle(cm,
|
||||||
|
dst, C.int32_t(len(buf)),
|
||||||
|
src, C.int32_t(len(input)),
|
||||||
|
&err)
|
||||||
|
}
|
||||||
|
return string(buf[:cn])
|
||||||
|
}
|
82
vendor/golang.org/x/text/cases/info.go
generated
vendored
Normal file
82
vendor/golang.org/x/text/cases/info.go
generated
vendored
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
// Copyright 2015 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package cases
|
||||||
|
|
||||||
|
func (c info) cccVal() info {
|
||||||
|
if c&exceptionBit != 0 {
|
||||||
|
return info(exceptions[c>>exceptionShift]) & cccMask
|
||||||
|
}
|
||||||
|
return c & cccMask
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c info) cccType() info {
|
||||||
|
ccc := c.cccVal()
|
||||||
|
if ccc <= cccZero {
|
||||||
|
return cccZero
|
||||||
|
}
|
||||||
|
return ccc
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Implement full Unicode breaking algorithm:
|
||||||
|
// 1) Implement breaking in separate package.
|
||||||
|
// 2) Use the breaker here.
|
||||||
|
// 3) Compare table size and performance of using the more generic breaker.
|
||||||
|
//
|
||||||
|
// Note that we can extend the current algorithm to be much more accurate. This
|
||||||
|
// only makes sense, though, if the performance and/or space penalty of using
|
||||||
|
// the generic breaker is big. Extra data will only be needed for non-cased
|
||||||
|
// runes, which means there are sufficient bits left in the caseType.
|
||||||
|
// ICU prohibits breaking in such cases as well.
|
||||||
|
|
||||||
|
// For the purpose of title casing we use an approximation of the Unicode Word
|
||||||
|
// Breaking algorithm defined in Annex #29:
|
||||||
|
// https://www.unicode.org/reports/tr29/#Default_Grapheme_Cluster_Table.
|
||||||
|
//
|
||||||
|
// For our approximation, we group the Word Break types into the following
|
||||||
|
// categories, with associated rules:
|
||||||
|
//
|
||||||
|
// 1) Letter:
|
||||||
|
// ALetter, Hebrew_Letter, Numeric, ExtendNumLet, Extend, Format_FE, ZWJ.
|
||||||
|
// Rule: Never break between consecutive runes of this category.
|
||||||
|
//
|
||||||
|
// 2) Mid:
|
||||||
|
// MidLetter, MidNumLet, Single_Quote.
|
||||||
|
// (Cf. case-ignorable: MidLetter, MidNumLet, Single_Quote or cat is Mn,
|
||||||
|
// Me, Cf, Lm or Sk).
|
||||||
|
// Rule: Don't break between Letter and Mid, but break between two Mids.
|
||||||
|
//
|
||||||
|
// 3) Break:
|
||||||
|
// Any other category: NewLine, MidNum, CR, LF, Double_Quote, Katakana, and
|
||||||
|
// Other.
|
||||||
|
// These categories should always result in a break between two cased letters.
|
||||||
|
// Rule: Always break.
|
||||||
|
//
|
||||||
|
// Note 1: the Katakana and MidNum categories can, in esoteric cases, result in
|
||||||
|
// preventing a break between two cased letters. For now we will ignore this
|
||||||
|
// (e.g. [ALetter] [ExtendNumLet] [Katakana] [ExtendNumLet] [ALetter] and
|
||||||
|
// [ALetter] [Numeric] [MidNum] [Numeric] [ALetter].)
|
||||||
|
//
|
||||||
|
// Note 2: the rule for Mid is very approximate, but works in most cases. To
|
||||||
|
// improve, we could store the categories in the trie value and use a FA to
|
||||||
|
// manage breaks. See TODO comment above.
|
||||||
|
//
|
||||||
|
// Note 3: according to the spec, it is possible for the Extend category to
|
||||||
|
// introduce breaks between other categories grouped in Letter. However, this
|
||||||
|
// is undesirable for our purposes. ICU prevents breaks in such cases as well.
|
||||||
|
|
||||||
|
// isBreak returns whether this rune should introduce a break.
|
||||||
|
func (c info) isBreak() bool {
|
||||||
|
return c.cccVal() == cccBreak
|
||||||
|
}
|
||||||
|
|
||||||
|
// isLetter returns whether the rune is of break type ALetter, Hebrew_Letter,
|
||||||
|
// Numeric, ExtendNumLet, or Extend.
|
||||||
|
func (c info) isLetter() bool {
|
||||||
|
ccc := c.cccVal()
|
||||||
|
if ccc == cccZero {
|
||||||
|
return !c.isCaseIgnorable()
|
||||||
|
}
|
||||||
|
return ccc != cccBreak
|
||||||
|
}
|
816
vendor/golang.org/x/text/cases/map.go
generated
vendored
Normal file
816
vendor/golang.org/x/text/cases/map.go
generated
vendored
Normal file
@ -0,0 +1,816 @@
|
|||||||
|
// Copyright 2014 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package cases
|
||||||
|
|
||||||
|
// This file contains the definitions of case mappings for all supported
|
||||||
|
// languages. The rules for the language-specific tailorings were taken and
|
||||||
|
// modified from the CLDR transform definitions in common/transforms.
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
"unicode"
|
||||||
|
"unicode/utf8"
|
||||||
|
|
||||||
|
"golang.org/x/text/internal"
|
||||||
|
"golang.org/x/text/language"
|
||||||
|
"golang.org/x/text/transform"
|
||||||
|
"golang.org/x/text/unicode/norm"
|
||||||
|
)
|
||||||
|
|
||||||
|
// A mapFunc takes a context set to the current rune and writes the mapped
|
||||||
|
// version to the same context. It may advance the context to the next rune. It
|
||||||
|
// returns whether a checkpoint is possible: whether the pDst bytes written to
|
||||||
|
// dst so far won't need changing as we see more source bytes.
|
||||||
|
type mapFunc func(*context) bool
|
||||||
|
|
||||||
|
// A spanFunc takes a context set to the current rune and returns whether this
|
||||||
|
// rune would be altered when written to the output. It may advance the context
|
||||||
|
// to the next rune. It returns whether a checkpoint is possible.
|
||||||
|
type spanFunc func(*context) bool
|
||||||
|
|
||||||
|
// maxIgnorable defines the maximum number of ignorables to consider for
|
||||||
|
// lookahead operations.
|
||||||
|
const maxIgnorable = 30
|
||||||
|
|
||||||
|
// supported lists the language tags for which we have tailorings.
|
||||||
|
const supported = "und af az el lt nl tr"
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
tags := []language.Tag{}
|
||||||
|
for _, s := range strings.Split(supported, " ") {
|
||||||
|
tags = append(tags, language.MustParse(s))
|
||||||
|
}
|
||||||
|
matcher = internal.NewInheritanceMatcher(tags)
|
||||||
|
Supported = language.NewCoverage(tags)
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
matcher *internal.InheritanceMatcher
|
||||||
|
|
||||||
|
Supported language.Coverage
|
||||||
|
|
||||||
|
// We keep the following lists separate, instead of having a single per-
|
||||||
|
// language struct, to give the compiler a chance to remove unused code.
|
||||||
|
|
||||||
|
// Some uppercase mappers are stateless, so we can precompute the
|
||||||
|
// Transformers and save a bit on runtime allocations.
|
||||||
|
upperFunc = []struct {
|
||||||
|
upper mapFunc
|
||||||
|
span spanFunc
|
||||||
|
}{
|
||||||
|
{nil, nil}, // und
|
||||||
|
{nil, nil}, // af
|
||||||
|
{aztrUpper(upper), isUpper}, // az
|
||||||
|
{elUpper, noSpan}, // el
|
||||||
|
{ltUpper(upper), noSpan}, // lt
|
||||||
|
{nil, nil}, // nl
|
||||||
|
{aztrUpper(upper), isUpper}, // tr
|
||||||
|
}
|
||||||
|
|
||||||
|
undUpper transform.SpanningTransformer = &undUpperCaser{}
|
||||||
|
undLower transform.SpanningTransformer = &undLowerCaser{}
|
||||||
|
undLowerIgnoreSigma transform.SpanningTransformer = &undLowerIgnoreSigmaCaser{}
|
||||||
|
|
||||||
|
lowerFunc = []mapFunc{
|
||||||
|
nil, // und
|
||||||
|
nil, // af
|
||||||
|
aztrLower, // az
|
||||||
|
nil, // el
|
||||||
|
ltLower, // lt
|
||||||
|
nil, // nl
|
||||||
|
aztrLower, // tr
|
||||||
|
}
|
||||||
|
|
||||||
|
titleInfos = []struct {
|
||||||
|
title mapFunc
|
||||||
|
lower mapFunc
|
||||||
|
titleSpan spanFunc
|
||||||
|
rewrite func(*context)
|
||||||
|
}{
|
||||||
|
{title, lower, isTitle, nil}, // und
|
||||||
|
{title, lower, isTitle, afnlRewrite}, // af
|
||||||
|
{aztrUpper(title), aztrLower, isTitle, nil}, // az
|
||||||
|
{title, lower, isTitle, nil}, // el
|
||||||
|
{ltUpper(title), ltLower, noSpan, nil}, // lt
|
||||||
|
{nlTitle, lower, nlTitleSpan, afnlRewrite}, // nl
|
||||||
|
{aztrUpper(title), aztrLower, isTitle, nil}, // tr
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
func makeUpper(t language.Tag, o options) transform.SpanningTransformer {
|
||||||
|
_, i, _ := matcher.Match(t)
|
||||||
|
f := upperFunc[i].upper
|
||||||
|
if f == nil {
|
||||||
|
return undUpper
|
||||||
|
}
|
||||||
|
return &simpleCaser{f: f, span: upperFunc[i].span}
|
||||||
|
}
|
||||||
|
|
||||||
|
func makeLower(t language.Tag, o options) transform.SpanningTransformer {
|
||||||
|
_, i, _ := matcher.Match(t)
|
||||||
|
f := lowerFunc[i]
|
||||||
|
if f == nil {
|
||||||
|
if o.ignoreFinalSigma {
|
||||||
|
return undLowerIgnoreSigma
|
||||||
|
}
|
||||||
|
return undLower
|
||||||
|
}
|
||||||
|
if o.ignoreFinalSigma {
|
||||||
|
return &simpleCaser{f: f, span: isLower}
|
||||||
|
}
|
||||||
|
return &lowerCaser{
|
||||||
|
first: f,
|
||||||
|
midWord: finalSigma(f),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func makeTitle(t language.Tag, o options) transform.SpanningTransformer {
|
||||||
|
_, i, _ := matcher.Match(t)
|
||||||
|
x := &titleInfos[i]
|
||||||
|
lower := x.lower
|
||||||
|
if o.noLower {
|
||||||
|
lower = (*context).copy
|
||||||
|
} else if !o.ignoreFinalSigma {
|
||||||
|
lower = finalSigma(lower)
|
||||||
|
}
|
||||||
|
return &titleCaser{
|
||||||
|
title: x.title,
|
||||||
|
lower: lower,
|
||||||
|
titleSpan: x.titleSpan,
|
||||||
|
rewrite: x.rewrite,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func noSpan(c *context) bool {
|
||||||
|
c.err = transform.ErrEndOfSpan
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: consider a similar special case for the fast majority lower case. This
|
||||||
|
// is a bit more involved so will require some more precise benchmarking to
|
||||||
|
// justify it.
|
||||||
|
|
||||||
|
type undUpperCaser struct{ transform.NopResetter }
|
||||||
|
|
||||||
|
// undUpperCaser implements the Transformer interface for doing an upper case
|
||||||
|
// mapping for the root locale (und). It eliminates the need for an allocation
|
||||||
|
// as it prevents escaping by not using function pointers.
|
||||||
|
func (t undUpperCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||||
|
c := context{dst: dst, src: src, atEOF: atEOF}
|
||||||
|
for c.next() {
|
||||||
|
upper(&c)
|
||||||
|
c.checkpoint()
|
||||||
|
}
|
||||||
|
return c.ret()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t undUpperCaser) Span(src []byte, atEOF bool) (n int, err error) {
|
||||||
|
c := context{src: src, atEOF: atEOF}
|
||||||
|
for c.next() && isUpper(&c) {
|
||||||
|
c.checkpoint()
|
||||||
|
}
|
||||||
|
return c.retSpan()
|
||||||
|
}
|
||||||
|
|
||||||
|
// undLowerIgnoreSigmaCaser implements the Transformer interface for doing
|
||||||
|
// a lower case mapping for the root locale (und) ignoring final sigma
|
||||||
|
// handling. This casing algorithm is used in some performance-critical packages
|
||||||
|
// like secure/precis and x/net/http/idna, which warrants its special-casing.
|
||||||
|
type undLowerIgnoreSigmaCaser struct{ transform.NopResetter }
|
||||||
|
|
||||||
|
func (t undLowerIgnoreSigmaCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||||
|
c := context{dst: dst, src: src, atEOF: atEOF}
|
||||||
|
for c.next() && lower(&c) {
|
||||||
|
c.checkpoint()
|
||||||
|
}
|
||||||
|
return c.ret()
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// Span implements a generic lower-casing. This is possible as isLower works
|
||||||
|
// for all lowercasing variants. All lowercase variants only vary in how they
|
||||||
|
// transform a non-lowercase letter. They will never change an already lowercase
|
||||||
|
// letter. In addition, there is no state.
|
||||||
|
func (t undLowerIgnoreSigmaCaser) Span(src []byte, atEOF bool) (n int, err error) {
|
||||||
|
c := context{src: src, atEOF: atEOF}
|
||||||
|
for c.next() && isLower(&c) {
|
||||||
|
c.checkpoint()
|
||||||
|
}
|
||||||
|
return c.retSpan()
|
||||||
|
}
|
||||||
|
|
||||||
|
type simpleCaser struct {
|
||||||
|
context
|
||||||
|
f mapFunc
|
||||||
|
span spanFunc
|
||||||
|
}
|
||||||
|
|
||||||
|
// simpleCaser implements the Transformer interface for doing a case operation
|
||||||
|
// on a rune-by-rune basis.
|
||||||
|
func (t *simpleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||||
|
c := context{dst: dst, src: src, atEOF: atEOF}
|
||||||
|
for c.next() && t.f(&c) {
|
||||||
|
c.checkpoint()
|
||||||
|
}
|
||||||
|
return c.ret()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *simpleCaser) Span(src []byte, atEOF bool) (n int, err error) {
|
||||||
|
c := context{src: src, atEOF: atEOF}
|
||||||
|
for c.next() && t.span(&c) {
|
||||||
|
c.checkpoint()
|
||||||
|
}
|
||||||
|
return c.retSpan()
|
||||||
|
}
|
||||||
|
|
||||||
|
// undLowerCaser implements the Transformer interface for doing a lower case
|
||||||
|
// mapping for the root locale (und) ignoring final sigma handling. This casing
|
||||||
|
// algorithm is used in some performance-critical packages like secure/precis
|
||||||
|
// and x/net/http/idna, which warrants its special-casing.
|
||||||
|
type undLowerCaser struct{ transform.NopResetter }
|
||||||
|
|
||||||
|
func (t undLowerCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||||
|
c := context{dst: dst, src: src, atEOF: atEOF}
|
||||||
|
|
||||||
|
for isInterWord := true; c.next(); {
|
||||||
|
if isInterWord {
|
||||||
|
if c.info.isCased() {
|
||||||
|
if !lower(&c) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
isInterWord = false
|
||||||
|
} else if !c.copy() {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if c.info.isNotCasedAndNotCaseIgnorable() {
|
||||||
|
if !c.copy() {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
isInterWord = true
|
||||||
|
} else if !c.hasPrefix("Σ") {
|
||||||
|
if !lower(&c) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
} else if !finalSigmaBody(&c) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
c.checkpoint()
|
||||||
|
}
|
||||||
|
return c.ret()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t undLowerCaser) Span(src []byte, atEOF bool) (n int, err error) {
|
||||||
|
c := context{src: src, atEOF: atEOF}
|
||||||
|
for c.next() && isLower(&c) {
|
||||||
|
c.checkpoint()
|
||||||
|
}
|
||||||
|
return c.retSpan()
|
||||||
|
}
|
||||||
|
|
||||||
|
// lowerCaser implements the Transformer interface. The default Unicode lower
|
||||||
|
// casing requires different treatment for the first and subsequent characters
|
||||||
|
// of a word, most notably to handle the Greek final Sigma.
|
||||||
|
type lowerCaser struct {
|
||||||
|
undLowerIgnoreSigmaCaser
|
||||||
|
|
||||||
|
context
|
||||||
|
|
||||||
|
first, midWord mapFunc
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *lowerCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||||
|
t.context = context{dst: dst, src: src, atEOF: atEOF}
|
||||||
|
c := &t.context
|
||||||
|
|
||||||
|
for isInterWord := true; c.next(); {
|
||||||
|
if isInterWord {
|
||||||
|
if c.info.isCased() {
|
||||||
|
if !t.first(c) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
isInterWord = false
|
||||||
|
} else if !c.copy() {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if c.info.isNotCasedAndNotCaseIgnorable() {
|
||||||
|
if !c.copy() {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
isInterWord = true
|
||||||
|
} else if !t.midWord(c) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
c.checkpoint()
|
||||||
|
}
|
||||||
|
return c.ret()
|
||||||
|
}
|
||||||
|
|
||||||
|
// titleCaser implements the Transformer interface. Title casing algorithms
|
||||||
|
// distinguish between the first letter of a word and subsequent letters of the
|
||||||
|
// same word. It uses state to avoid requiring a potentially infinite lookahead.
|
||||||
|
type titleCaser struct {
|
||||||
|
context
|
||||||
|
|
||||||
|
// rune mappings used by the actual casing algorithms.
|
||||||
|
title mapFunc
|
||||||
|
lower mapFunc
|
||||||
|
titleSpan spanFunc
|
||||||
|
|
||||||
|
rewrite func(*context)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Transform implements the standard Unicode title case algorithm as defined in
|
||||||
|
// Chapter 3 of The Unicode Standard:
|
||||||
|
// toTitlecase(X): Find the word boundaries in X according to Unicode Standard
|
||||||
|
// Annex #29, "Unicode Text Segmentation." For each word boundary, find the
|
||||||
|
// first cased character F following the word boundary. If F exists, map F to
|
||||||
|
// Titlecase_Mapping(F); then map all characters C between F and the following
|
||||||
|
// word boundary to Lowercase_Mapping(C).
|
||||||
|
func (t *titleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||||
|
t.context = context{dst: dst, src: src, atEOF: atEOF, isMidWord: t.isMidWord}
|
||||||
|
c := &t.context
|
||||||
|
|
||||||
|
if !c.next() {
|
||||||
|
return c.ret()
|
||||||
|
}
|
||||||
|
|
||||||
|
for {
|
||||||
|
p := c.info
|
||||||
|
if t.rewrite != nil {
|
||||||
|
t.rewrite(c)
|
||||||
|
}
|
||||||
|
|
||||||
|
wasMid := p.isMid()
|
||||||
|
// Break out of this loop on failure to ensure we do not modify the
|
||||||
|
// state incorrectly.
|
||||||
|
if p.isCased() {
|
||||||
|
if !c.isMidWord {
|
||||||
|
if !t.title(c) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
c.isMidWord = true
|
||||||
|
} else if !t.lower(c) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
} else if !c.copy() {
|
||||||
|
break
|
||||||
|
} else if p.isBreak() {
|
||||||
|
c.isMidWord = false
|
||||||
|
}
|
||||||
|
|
||||||
|
// As we save the state of the transformer, it is safe to call
|
||||||
|
// checkpoint after any successful write.
|
||||||
|
if !(c.isMidWord && wasMid) {
|
||||||
|
c.checkpoint()
|
||||||
|
}
|
||||||
|
|
||||||
|
if !c.next() {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if wasMid && c.info.isMid() {
|
||||||
|
c.isMidWord = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return c.ret()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *titleCaser) Span(src []byte, atEOF bool) (n int, err error) {
|
||||||
|
t.context = context{src: src, atEOF: atEOF, isMidWord: t.isMidWord}
|
||||||
|
c := &t.context
|
||||||
|
|
||||||
|
if !c.next() {
|
||||||
|
return c.retSpan()
|
||||||
|
}
|
||||||
|
|
||||||
|
for {
|
||||||
|
p := c.info
|
||||||
|
if t.rewrite != nil {
|
||||||
|
t.rewrite(c)
|
||||||
|
}
|
||||||
|
|
||||||
|
wasMid := p.isMid()
|
||||||
|
// Break out of this loop on failure to ensure we do not modify the
|
||||||
|
// state incorrectly.
|
||||||
|
if p.isCased() {
|
||||||
|
if !c.isMidWord {
|
||||||
|
if !t.titleSpan(c) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
c.isMidWord = true
|
||||||
|
} else if !isLower(c) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
} else if p.isBreak() {
|
||||||
|
c.isMidWord = false
|
||||||
|
}
|
||||||
|
// As we save the state of the transformer, it is safe to call
|
||||||
|
// checkpoint after any successful write.
|
||||||
|
if !(c.isMidWord && wasMid) {
|
||||||
|
c.checkpoint()
|
||||||
|
}
|
||||||
|
|
||||||
|
if !c.next() {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if wasMid && c.info.isMid() {
|
||||||
|
c.isMidWord = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return c.retSpan()
|
||||||
|
}
|
||||||
|
|
||||||
|
// finalSigma adds Greek final Sigma handing to another casing function. It
|
||||||
|
// determines whether a lowercased sigma should be σ or ς, by looking ahead for
|
||||||
|
// case-ignorables and a cased letters.
|
||||||
|
func finalSigma(f mapFunc) mapFunc {
|
||||||
|
return func(c *context) bool {
|
||||||
|
if !c.hasPrefix("Σ") {
|
||||||
|
return f(c)
|
||||||
|
}
|
||||||
|
return finalSigmaBody(c)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func finalSigmaBody(c *context) bool {
|
||||||
|
// Current rune must be ∑.
|
||||||
|
|
||||||
|
// ::NFD();
|
||||||
|
// # 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
|
||||||
|
// Σ } [:case-ignorable:]* [:cased:] → σ;
|
||||||
|
// [:cased:] [:case-ignorable:]* { Σ → ς;
|
||||||
|
// ::Any-Lower;
|
||||||
|
// ::NFC();
|
||||||
|
|
||||||
|
p := c.pDst
|
||||||
|
c.writeString("ς")
|
||||||
|
|
||||||
|
// TODO: we should do this here, but right now this will never have an
|
||||||
|
// effect as this is called when the prefix is Sigma, whereas Dutch and
|
||||||
|
// Afrikaans only test for an apostrophe.
|
||||||
|
//
|
||||||
|
// if t.rewrite != nil {
|
||||||
|
// t.rewrite(c)
|
||||||
|
// }
|
||||||
|
|
||||||
|
// We need to do one more iteration after maxIgnorable, as a cased
|
||||||
|
// letter is not an ignorable and may modify the result.
|
||||||
|
wasMid := false
|
||||||
|
for i := 0; i < maxIgnorable+1; i++ {
|
||||||
|
if !c.next() {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if !c.info.isCaseIgnorable() {
|
||||||
|
// All Midword runes are also case ignorable, so we are
|
||||||
|
// guaranteed to have a letter or word break here. As we are
|
||||||
|
// unreading the run, there is no need to unset c.isMidWord;
|
||||||
|
// the title caser will handle this.
|
||||||
|
if c.info.isCased() {
|
||||||
|
// p+1 is guaranteed to be in bounds: if writing ς was
|
||||||
|
// successful, p+1 will contain the second byte of ς. If not,
|
||||||
|
// this function will have returned after c.next returned false.
|
||||||
|
c.dst[p+1]++ // ς → σ
|
||||||
|
}
|
||||||
|
c.unreadRune()
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
// A case ignorable may also introduce a word break, so we may need
|
||||||
|
// to continue searching even after detecting a break.
|
||||||
|
isMid := c.info.isMid()
|
||||||
|
if (wasMid && isMid) || c.info.isBreak() {
|
||||||
|
c.isMidWord = false
|
||||||
|
}
|
||||||
|
wasMid = isMid
|
||||||
|
c.copy()
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// finalSigmaSpan would be the same as isLower.
|
||||||
|
|
||||||
|
// elUpper implements Greek upper casing, which entails removing a predefined
|
||||||
|
// set of non-blocked modifiers. Note that these accents should not be removed
|
||||||
|
// for title casing!
|
||||||
|
// Example: "Οδός" -> "ΟΔΟΣ".
|
||||||
|
func elUpper(c *context) bool {
|
||||||
|
// From CLDR:
|
||||||
|
// [:Greek:] [^[:ccc=Not_Reordered:][:ccc=Above:]]*? { [\u0313\u0314\u0301\u0300\u0306\u0342\u0308\u0304] → ;
|
||||||
|
// [:Greek:] [^[:ccc=Not_Reordered:][:ccc=Iota_Subscript:]]*? { \u0345 → ;
|
||||||
|
|
||||||
|
r, _ := utf8.DecodeRune(c.src[c.pSrc:])
|
||||||
|
oldPDst := c.pDst
|
||||||
|
if !upper(c) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if !unicode.Is(unicode.Greek, r) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
i := 0
|
||||||
|
// Take the properties of the uppercased rune that is already written to the
|
||||||
|
// destination. This saves us the trouble of having to uppercase the
|
||||||
|
// decomposed rune again.
|
||||||
|
if b := norm.NFD.Properties(c.dst[oldPDst:]).Decomposition(); b != nil {
|
||||||
|
// Restore the destination position and process the decomposed rune.
|
||||||
|
r, sz := utf8.DecodeRune(b)
|
||||||
|
if r <= 0xFF { // See A.6.1
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
c.pDst = oldPDst
|
||||||
|
// Insert the first rune and ignore the modifiers. See A.6.2.
|
||||||
|
c.writeBytes(b[:sz])
|
||||||
|
i = len(b[sz:]) / 2 // Greek modifiers are always of length 2.
|
||||||
|
}
|
||||||
|
|
||||||
|
for ; i < maxIgnorable && c.next(); i++ {
|
||||||
|
switch r, _ := utf8.DecodeRune(c.src[c.pSrc:]); r {
|
||||||
|
// Above and Iota Subscript
|
||||||
|
case 0x0300, // U+0300 COMBINING GRAVE ACCENT
|
||||||
|
0x0301, // U+0301 COMBINING ACUTE ACCENT
|
||||||
|
0x0304, // U+0304 COMBINING MACRON
|
||||||
|
0x0306, // U+0306 COMBINING BREVE
|
||||||
|
0x0308, // U+0308 COMBINING DIAERESIS
|
||||||
|
0x0313, // U+0313 COMBINING COMMA ABOVE
|
||||||
|
0x0314, // U+0314 COMBINING REVERSED COMMA ABOVE
|
||||||
|
0x0342, // U+0342 COMBINING GREEK PERISPOMENI
|
||||||
|
0x0345: // U+0345 COMBINING GREEK YPOGEGRAMMENI
|
||||||
|
// No-op. Gobble the modifier.
|
||||||
|
|
||||||
|
default:
|
||||||
|
switch v, _ := trie.lookup(c.src[c.pSrc:]); info(v).cccType() {
|
||||||
|
case cccZero:
|
||||||
|
c.unreadRune()
|
||||||
|
return true
|
||||||
|
|
||||||
|
// We don't need to test for IotaSubscript as the only rune that
|
||||||
|
// qualifies (U+0345) was already excluded in the switch statement
|
||||||
|
// above. See A.4.
|
||||||
|
|
||||||
|
case cccAbove:
|
||||||
|
return c.copy()
|
||||||
|
default:
|
||||||
|
// Some other modifier. We're still allowed to gobble Greek
|
||||||
|
// modifiers after this.
|
||||||
|
c.copy()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return i == maxIgnorable
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: implement elUpperSpan (low-priority: complex and infrequent).
|
||||||
|
|
||||||
|
func ltLower(c *context) bool {
|
||||||
|
// From CLDR:
|
||||||
|
// # Introduce an explicit dot above when lowercasing capital I's and J's
|
||||||
|
// # whenever there are more accents above.
|
||||||
|
// # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
|
||||||
|
// # 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
|
||||||
|
// # 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
|
||||||
|
// # 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
|
||||||
|
// # 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
|
||||||
|
// # 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
|
||||||
|
// # 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
|
||||||
|
// ::NFD();
|
||||||
|
// I } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0307;
|
||||||
|
// J } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → j \u0307;
|
||||||
|
// I \u0328 (Į) } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0328 \u0307;
|
||||||
|
// I \u0300 (Ì) → i \u0307 \u0300;
|
||||||
|
// I \u0301 (Í) → i \u0307 \u0301;
|
||||||
|
// I \u0303 (Ĩ) → i \u0307 \u0303;
|
||||||
|
// ::Any-Lower();
|
||||||
|
// ::NFC();
|
||||||
|
|
||||||
|
i := 0
|
||||||
|
if r := c.src[c.pSrc]; r < utf8.RuneSelf {
|
||||||
|
lower(c)
|
||||||
|
if r != 'I' && r != 'J' {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
p := norm.NFD.Properties(c.src[c.pSrc:])
|
||||||
|
if d := p.Decomposition(); len(d) >= 3 && (d[0] == 'I' || d[0] == 'J') {
|
||||||
|
// UTF-8 optimization: the decomposition will only have an above
|
||||||
|
// modifier if the last rune of the decomposition is in [U+300-U+311].
|
||||||
|
// In all other cases, a decomposition starting with I is always
|
||||||
|
// an I followed by modifiers that are not cased themselves. See A.2.
|
||||||
|
if d[1] == 0xCC && d[2] <= 0x91 { // A.2.4.
|
||||||
|
if !c.writeBytes(d[:1]) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
c.dst[c.pDst-1] += 'a' - 'A' // lower
|
||||||
|
|
||||||
|
// Assumption: modifier never changes on lowercase. See A.1.
|
||||||
|
// Assumption: all modifiers added have CCC = Above. See A.2.3.
|
||||||
|
return c.writeString("\u0307") && c.writeBytes(d[1:])
|
||||||
|
}
|
||||||
|
// In all other cases the additional modifiers will have a CCC
|
||||||
|
// that is less than 230 (Above). We will insert the U+0307, if
|
||||||
|
// needed, after these modifiers so that a string in FCD form
|
||||||
|
// will remain so. See A.2.2.
|
||||||
|
lower(c)
|
||||||
|
i = 1
|
||||||
|
} else {
|
||||||
|
return lower(c)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for ; i < maxIgnorable && c.next(); i++ {
|
||||||
|
switch c.info.cccType() {
|
||||||
|
case cccZero:
|
||||||
|
c.unreadRune()
|
||||||
|
return true
|
||||||
|
case cccAbove:
|
||||||
|
return c.writeString("\u0307") && c.copy() // See A.1.
|
||||||
|
default:
|
||||||
|
c.copy() // See A.1.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return i == maxIgnorable
|
||||||
|
}
|
||||||
|
|
||||||
|
// ltLowerSpan would be the same as isLower.
|
||||||
|
|
||||||
|
func ltUpper(f mapFunc) mapFunc {
|
||||||
|
return func(c *context) bool {
|
||||||
|
// Unicode:
|
||||||
|
// 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
|
||||||
|
//
|
||||||
|
// From CLDR:
|
||||||
|
// # Remove \u0307 following soft-dotteds (i, j, and the like), with possible
|
||||||
|
// # intervening non-230 marks.
|
||||||
|
// ::NFD();
|
||||||
|
// [:Soft_Dotted:] [^[:ccc=Not_Reordered:][:ccc=Above:]]* { \u0307 → ;
|
||||||
|
// ::Any-Upper();
|
||||||
|
// ::NFC();
|
||||||
|
|
||||||
|
// TODO: See A.5. A soft-dotted rune never has an exception. This would
|
||||||
|
// allow us to overload the exception bit and encode this property in
|
||||||
|
// info. Need to measure performance impact of this.
|
||||||
|
r, _ := utf8.DecodeRune(c.src[c.pSrc:])
|
||||||
|
oldPDst := c.pDst
|
||||||
|
if !f(c) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if !unicode.Is(unicode.Soft_Dotted, r) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// We don't need to do an NFD normalization, as a soft-dotted rune never
|
||||||
|
// contains U+0307. See A.3.
|
||||||
|
|
||||||
|
i := 0
|
||||||
|
for ; i < maxIgnorable && c.next(); i++ {
|
||||||
|
switch c.info.cccType() {
|
||||||
|
case cccZero:
|
||||||
|
c.unreadRune()
|
||||||
|
return true
|
||||||
|
case cccAbove:
|
||||||
|
if c.hasPrefix("\u0307") {
|
||||||
|
// We don't do a full NFC, but rather combine runes for
|
||||||
|
// some of the common cases. (Returning NFC or
|
||||||
|
// preserving normal form is neither a requirement nor
|
||||||
|
// a possibility anyway).
|
||||||
|
if !c.next() {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if c.dst[oldPDst] == 'I' && c.pDst == oldPDst+1 && c.src[c.pSrc] == 0xcc {
|
||||||
|
s := ""
|
||||||
|
switch c.src[c.pSrc+1] {
|
||||||
|
case 0x80: // U+0300 COMBINING GRAVE ACCENT
|
||||||
|
s = "\u00cc" // U+00CC LATIN CAPITAL LETTER I WITH GRAVE
|
||||||
|
case 0x81: // U+0301 COMBINING ACUTE ACCENT
|
||||||
|
s = "\u00cd" // U+00CD LATIN CAPITAL LETTER I WITH ACUTE
|
||||||
|
case 0x83: // U+0303 COMBINING TILDE
|
||||||
|
s = "\u0128" // U+0128 LATIN CAPITAL LETTER I WITH TILDE
|
||||||
|
case 0x88: // U+0308 COMBINING DIAERESIS
|
||||||
|
s = "\u00cf" // U+00CF LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
if s != "" {
|
||||||
|
c.pDst = oldPDst
|
||||||
|
return c.writeString(s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return c.copy()
|
||||||
|
default:
|
||||||
|
c.copy()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return i == maxIgnorable
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: implement ltUpperSpan (low priority: complex and infrequent).
|
||||||
|
|
||||||
|
func aztrUpper(f mapFunc) mapFunc {
|
||||||
|
return func(c *context) bool {
|
||||||
|
// i→İ;
|
||||||
|
if c.src[c.pSrc] == 'i' {
|
||||||
|
return c.writeString("İ")
|
||||||
|
}
|
||||||
|
return f(c)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func aztrLower(c *context) (done bool) {
|
||||||
|
// From CLDR:
|
||||||
|
// # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
|
||||||
|
// # 0130; 0069; 0130; 0130; tr; # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||||
|
// İ→i;
|
||||||
|
// # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
|
||||||
|
// # This matches the behavior of the canonically equivalent I-dot_above
|
||||||
|
// # 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
|
||||||
|
// # When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
|
||||||
|
// # 0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I
|
||||||
|
// I([^[:ccc=Not_Reordered:][:ccc=Above:]]*)\u0307 → i$1 ;
|
||||||
|
// I→ı ;
|
||||||
|
// ::Any-Lower();
|
||||||
|
if c.hasPrefix("\u0130") { // İ
|
||||||
|
return c.writeString("i")
|
||||||
|
}
|
||||||
|
if c.src[c.pSrc] != 'I' {
|
||||||
|
return lower(c)
|
||||||
|
}
|
||||||
|
|
||||||
|
// We ignore the lower-case I for now, but insert it later when we know
|
||||||
|
// which form we need.
|
||||||
|
start := c.pSrc + c.sz
|
||||||
|
|
||||||
|
i := 0
|
||||||
|
Loop:
|
||||||
|
// We check for up to n ignorables before \u0307. As \u0307 is an
|
||||||
|
// ignorable as well, n is maxIgnorable-1.
|
||||||
|
for ; i < maxIgnorable && c.next(); i++ {
|
||||||
|
switch c.info.cccType() {
|
||||||
|
case cccAbove:
|
||||||
|
if c.hasPrefix("\u0307") {
|
||||||
|
return c.writeString("i") && c.writeBytes(c.src[start:c.pSrc]) // ignore U+0307
|
||||||
|
}
|
||||||
|
done = true
|
||||||
|
break Loop
|
||||||
|
case cccZero:
|
||||||
|
c.unreadRune()
|
||||||
|
done = true
|
||||||
|
break Loop
|
||||||
|
default:
|
||||||
|
// We'll write this rune after we know which starter to use.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if i == maxIgnorable {
|
||||||
|
done = true
|
||||||
|
}
|
||||||
|
return c.writeString("ı") && c.writeBytes(c.src[start:c.pSrc+c.sz]) && done
|
||||||
|
}
|
||||||
|
|
||||||
|
// aztrLowerSpan would be the same as isLower.
|
||||||
|
|
||||||
|
func nlTitle(c *context) bool {
|
||||||
|
// From CLDR:
|
||||||
|
// # Special titlecasing for Dutch initial "ij".
|
||||||
|
// ::Any-Title();
|
||||||
|
// # Fix up Ij at the beginning of a "word" (per Any-Title, notUAX #29)
|
||||||
|
// [:^WB=ALetter:] [:WB=Extend:]* [[:WB=MidLetter:][:WB=MidNumLet:]]? { Ij } → IJ ;
|
||||||
|
if c.src[c.pSrc] != 'I' && c.src[c.pSrc] != 'i' {
|
||||||
|
return title(c)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !c.writeString("I") || !c.next() {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if c.src[c.pSrc] == 'j' || c.src[c.pSrc] == 'J' {
|
||||||
|
return c.writeString("J")
|
||||||
|
}
|
||||||
|
c.unreadRune()
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func nlTitleSpan(c *context) bool {
|
||||||
|
// From CLDR:
|
||||||
|
// # Special titlecasing for Dutch initial "ij".
|
||||||
|
// ::Any-Title();
|
||||||
|
// # Fix up Ij at the beginning of a "word" (per Any-Title, notUAX #29)
|
||||||
|
// [:^WB=ALetter:] [:WB=Extend:]* [[:WB=MidLetter:][:WB=MidNumLet:]]? { Ij } → IJ ;
|
||||||
|
if c.src[c.pSrc] != 'I' {
|
||||||
|
return isTitle(c)
|
||||||
|
}
|
||||||
|
if !c.next() || c.src[c.pSrc] == 'j' {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if c.src[c.pSrc] != 'J' {
|
||||||
|
c.unreadRune()
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Not part of CLDR, but see https://unicode.org/cldr/trac/ticket/7078.
|
||||||
|
func afnlRewrite(c *context) {
|
||||||
|
if c.hasPrefix("'") || c.hasPrefix("’") {
|
||||||
|
c.isMidWord = true
|
||||||
|
}
|
||||||
|
}
|
2255
vendor/golang.org/x/text/cases/tables10.0.0.go
generated
vendored
Normal file
2255
vendor/golang.org/x/text/cases/tables10.0.0.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
2316
vendor/golang.org/x/text/cases/tables11.0.0.go
generated
vendored
Normal file
2316
vendor/golang.org/x/text/cases/tables11.0.0.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
2359
vendor/golang.org/x/text/cases/tables12.0.0.go
generated
vendored
Normal file
2359
vendor/golang.org/x/text/cases/tables12.0.0.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
2399
vendor/golang.org/x/text/cases/tables13.0.0.go
generated
vendored
Normal file
2399
vendor/golang.org/x/text/cases/tables13.0.0.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
2527
vendor/golang.org/x/text/cases/tables15.0.0.go
generated
vendored
Normal file
2527
vendor/golang.org/x/text/cases/tables15.0.0.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
2215
vendor/golang.org/x/text/cases/tables9.0.0.go
generated
vendored
Normal file
2215
vendor/golang.org/x/text/cases/tables9.0.0.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
217
vendor/golang.org/x/text/cases/trieval.go
generated
vendored
Normal file
217
vendor/golang.org/x/text/cases/trieval.go
generated
vendored
Normal file
@ -0,0 +1,217 @@
|
|||||||
|
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||||
|
|
||||||
|
package cases
|
||||||
|
|
||||||
|
// This file contains definitions for interpreting the trie value of the case
|
||||||
|
// trie generated by "go run gen*.go". It is shared by both the generator
|
||||||
|
// program and the resultant package. Sharing is achieved by the generator
|
||||||
|
// copying gen_trieval.go to trieval.go and changing what's above this comment.
|
||||||
|
|
||||||
|
// info holds case information for a single rune. It is the value returned
|
||||||
|
// by a trie lookup. Most mapping information can be stored in a single 16-bit
|
||||||
|
// value. If not, for example when a rune is mapped to multiple runes, the value
|
||||||
|
// stores some basic case data and an index into an array with additional data.
|
||||||
|
//
|
||||||
|
// The per-rune values have the following format:
|
||||||
|
//
|
||||||
|
// if (exception) {
|
||||||
|
// 15..4 unsigned exception index
|
||||||
|
// } else {
|
||||||
|
// 15..8 XOR pattern or index to XOR pattern for case mapping
|
||||||
|
// Only 13..8 are used for XOR patterns.
|
||||||
|
// 7 inverseFold (fold to upper, not to lower)
|
||||||
|
// 6 index: interpret the XOR pattern as an index
|
||||||
|
// or isMid if case mode is cIgnorableUncased.
|
||||||
|
// 5..4 CCC: zero (normal or break), above or other
|
||||||
|
// }
|
||||||
|
// 3 exception: interpret this value as an exception index
|
||||||
|
// (TODO: is this bit necessary? Probably implied from case mode.)
|
||||||
|
// 2..0 case mode
|
||||||
|
//
|
||||||
|
// For the non-exceptional cases, a rune must be either uncased, lowercase or
|
||||||
|
// uppercase. If the rune is cased, the XOR pattern maps either a lowercase
|
||||||
|
// rune to uppercase or an uppercase rune to lowercase (applied to the 10
|
||||||
|
// least-significant bits of the rune).
|
||||||
|
//
|
||||||
|
// See the definitions below for a more detailed description of the various
|
||||||
|
// bits.
|
||||||
|
type info uint16
|
||||||
|
|
||||||
|
const (
|
||||||
|
casedMask = 0x0003
|
||||||
|
fullCasedMask = 0x0007
|
||||||
|
ignorableMask = 0x0006
|
||||||
|
ignorableValue = 0x0004
|
||||||
|
|
||||||
|
inverseFoldBit = 1 << 7
|
||||||
|
isMidBit = 1 << 6
|
||||||
|
|
||||||
|
exceptionBit = 1 << 3
|
||||||
|
exceptionShift = 4
|
||||||
|
numExceptionBits = 12
|
||||||
|
|
||||||
|
xorIndexBit = 1 << 6
|
||||||
|
xorShift = 8
|
||||||
|
|
||||||
|
// There is no mapping if all xor bits and the exception bit are zero.
|
||||||
|
hasMappingMask = 0xff80 | exceptionBit
|
||||||
|
)
|
||||||
|
|
||||||
|
// The case mode bits encodes the case type of a rune. This includes uncased,
|
||||||
|
// title, upper and lower case and case ignorable. (For a definition of these
|
||||||
|
// terms see Chapter 3 of The Unicode Standard Core Specification.) In some rare
|
||||||
|
// cases, a rune can be both cased and case-ignorable. This is encoded by
|
||||||
|
// cIgnorableCased. A rune of this type is always lower case. Some runes are
|
||||||
|
// cased while not having a mapping.
|
||||||
|
//
|
||||||
|
// A common pattern for scripts in the Unicode standard is for upper and lower
|
||||||
|
// case runes to alternate for increasing rune values (e.g. the accented Latin
|
||||||
|
// ranges starting from U+0100 and U+1E00 among others and some Cyrillic
|
||||||
|
// characters). We use this property by defining a cXORCase mode, where the case
|
||||||
|
// mode (always upper or lower case) is derived from the rune value. As the XOR
|
||||||
|
// pattern for case mappings is often identical for successive runes, using
|
||||||
|
// cXORCase can result in large series of identical trie values. This, in turn,
|
||||||
|
// allows us to better compress the trie blocks.
|
||||||
|
const (
|
||||||
|
cUncased info = iota // 000
|
||||||
|
cTitle // 001
|
||||||
|
cLower // 010
|
||||||
|
cUpper // 011
|
||||||
|
cIgnorableUncased // 100
|
||||||
|
cIgnorableCased // 101 // lower case if mappings exist
|
||||||
|
cXORCase // 11x // case is cLower | ((rune&1) ^ x)
|
||||||
|
|
||||||
|
maxCaseMode = cUpper
|
||||||
|
)
|
||||||
|
|
||||||
|
func (c info) isCased() bool {
|
||||||
|
return c&casedMask != 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c info) isCaseIgnorable() bool {
|
||||||
|
return c&ignorableMask == ignorableValue
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c info) isNotCasedAndNotCaseIgnorable() bool {
|
||||||
|
return c&fullCasedMask == 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c info) isCaseIgnorableAndNotCased() bool {
|
||||||
|
return c&fullCasedMask == cIgnorableUncased
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c info) isMid() bool {
|
||||||
|
return c&(fullCasedMask|isMidBit) == isMidBit|cIgnorableUncased
|
||||||
|
}
|
||||||
|
|
||||||
|
// The case mapping implementation will need to know about various Canonical
|
||||||
|
// Combining Class (CCC) values. We encode two of these in the trie value:
|
||||||
|
// cccZero (0) and cccAbove (230). If the value is cccOther, it means that
|
||||||
|
// CCC(r) > 0, but not 230. A value of cccBreak means that CCC(r) == 0 and that
|
||||||
|
// the rune also has the break category Break (see below).
|
||||||
|
const (
|
||||||
|
cccBreak info = iota << 4
|
||||||
|
cccZero
|
||||||
|
cccAbove
|
||||||
|
cccOther
|
||||||
|
|
||||||
|
cccMask = cccBreak | cccZero | cccAbove | cccOther
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
starter = 0
|
||||||
|
above = 230
|
||||||
|
iotaSubscript = 240
|
||||||
|
)
|
||||||
|
|
||||||
|
// The exceptions slice holds data that does not fit in a normal info entry.
|
||||||
|
// The entry is pointed to by the exception index in an entry. It has the
|
||||||
|
// following format:
|
||||||
|
//
|
||||||
|
// Header:
|
||||||
|
//
|
||||||
|
// byte 0:
|
||||||
|
// 7..6 unused
|
||||||
|
// 5..4 CCC type (same bits as entry)
|
||||||
|
// 3 unused
|
||||||
|
// 2..0 length of fold
|
||||||
|
//
|
||||||
|
// byte 1:
|
||||||
|
// 7..6 unused
|
||||||
|
// 5..3 length of 1st mapping of case type
|
||||||
|
// 2..0 length of 2nd mapping of case type
|
||||||
|
//
|
||||||
|
// case 1st 2nd
|
||||||
|
// lower -> upper, title
|
||||||
|
// upper -> lower, title
|
||||||
|
// title -> lower, upper
|
||||||
|
//
|
||||||
|
// Lengths with the value 0x7 indicate no value and implies no change.
|
||||||
|
// A length of 0 indicates a mapping to zero-length string.
|
||||||
|
//
|
||||||
|
// Body bytes:
|
||||||
|
//
|
||||||
|
// case folding bytes
|
||||||
|
// lowercase mapping bytes
|
||||||
|
// uppercase mapping bytes
|
||||||
|
// titlecase mapping bytes
|
||||||
|
// closure mapping bytes (for NFKC_Casefold). (TODO)
|
||||||
|
//
|
||||||
|
// Fallbacks:
|
||||||
|
//
|
||||||
|
// missing fold -> lower
|
||||||
|
// missing title -> upper
|
||||||
|
// all missing -> original rune
|
||||||
|
//
|
||||||
|
// exceptions starts with a dummy byte to enforce that there is no zero index
|
||||||
|
// value.
|
||||||
|
const (
|
||||||
|
lengthMask = 0x07
|
||||||
|
lengthBits = 3
|
||||||
|
noChange = 0
|
||||||
|
)
|
||||||
|
|
||||||
|
// References to generated trie.
|
||||||
|
|
||||||
|
var trie = newCaseTrie(0)
|
||||||
|
|
||||||
|
var sparse = sparseBlocks{
|
||||||
|
values: sparseValues[:],
|
||||||
|
offsets: sparseOffsets[:],
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sparse block lookup code.
|
||||||
|
|
||||||
|
// valueRange is an entry in a sparse block.
|
||||||
|
type valueRange struct {
|
||||||
|
value uint16
|
||||||
|
lo, hi byte
|
||||||
|
}
|
||||||
|
|
||||||
|
type sparseBlocks struct {
|
||||||
|
values []valueRange
|
||||||
|
offsets []uint16
|
||||||
|
}
|
||||||
|
|
||||||
|
// lookup returns the value from values block n for byte b using binary search.
|
||||||
|
func (s *sparseBlocks) lookup(n uint32, b byte) uint16 {
|
||||||
|
lo := s.offsets[n]
|
||||||
|
hi := s.offsets[n+1]
|
||||||
|
for lo < hi {
|
||||||
|
m := lo + (hi-lo)/2
|
||||||
|
r := s.values[m]
|
||||||
|
if r.lo <= b && b <= r.hi {
|
||||||
|
return r.value
|
||||||
|
}
|
||||||
|
if b < r.lo {
|
||||||
|
hi = m
|
||||||
|
} else {
|
||||||
|
lo = m + 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// lastRuneForTesting is the last rune used for testing. Everything after this
|
||||||
|
// is boring.
|
||||||
|
const lastRuneForTesting = rune(0x1FFFF)
|
1
vendor/modules.txt
vendored
1
vendor/modules.txt
vendored
@ -955,6 +955,7 @@ golang.org/x/sys/windows/svc/mgr
|
|||||||
golang.org/x/term
|
golang.org/x/term
|
||||||
# golang.org/x/text v0.14.0
|
# golang.org/x/text v0.14.0
|
||||||
## explicit; go 1.18
|
## explicit; go 1.18
|
||||||
|
golang.org/x/text/cases
|
||||||
golang.org/x/text/encoding
|
golang.org/x/text/encoding
|
||||||
golang.org/x/text/encoding/charmap
|
golang.org/x/text/encoding/charmap
|
||||||
golang.org/x/text/encoding/htmlindex
|
golang.org/x/text/encoding/htmlindex
|
||||||
|
Loading…
Reference in New Issue
Block a user