diff --git a/vendor/golang.org/x/text/unicode/norm/composition.go b/vendor/golang.org/x/text/unicode/norm/composition.go index d17b278a..bab4c5de 100644 --- a/vendor/golang.org/x/text/unicode/norm/composition.go +++ b/vendor/golang.org/x/text/unicode/norm/composition.go @@ -33,17 +33,9 @@ const ( // streamSafe implements the policy of when a CGJ should be inserted. type streamSafe uint8 -// mkStreamSafe is a shorthand for declaring a streamSafe var and calling -// first on it. -func mkStreamSafe(p Properties) streamSafe { - return streamSafe(p.nTrailingNonStarters()) -} - -// first inserts the first rune of a segment. +// first inserts the first rune of a segment. It is a faster version of next if +// it is known p represents the first rune in a segment. func (ss *streamSafe) first(p Properties) { - if *ss != 0 { - panic("!= 0") - } *ss = streamSafe(p.nTrailingNonStarters()) } @@ -66,7 +58,7 @@ func (ss *streamSafe) next(p Properties) ssState { // be a non-starter. Note that it always hold that if nLead > 0 then // nLead == nTrail. if n == 0 { - *ss = 0 + *ss = streamSafe(p.nTrailingNonStarters()) return ssStarter } return ssSuccess @@ -142,7 +134,6 @@ func (rb *reorderBuffer) setFlusher(out []byte, f func(*reorderBuffer) bool) { func (rb *reorderBuffer) reset() { rb.nrune = 0 rb.nbyte = 0 - rb.ss = 0 } func (rb *reorderBuffer) doFlush() bool { @@ -257,6 +248,9 @@ func (rb *reorderBuffer) insertUnsafe(src input, i int, info Properties) { // It flushes the buffer on each new segment start. func (rb *reorderBuffer) insertDecomposed(dcomp []byte) insertErr { rb.tmpBytes.setBytes(dcomp) + // As the streamSafe accounting already handles the counting for modifiers, + // we don't have to call next. However, we do need to keep the accounting + // intact when flushing the buffer. for i := 0; i < len(dcomp); { info := rb.f.info(rb.tmpBytes, i) if info.BoundaryBefore() && rb.nrune > 0 && !rb.doFlush() { diff --git a/vendor/golang.org/x/text/unicode/norm/iter.go b/vendor/golang.org/x/text/unicode/norm/iter.go index 0a42a72d..ce17f96c 100644 --- a/vendor/golang.org/x/text/unicode/norm/iter.go +++ b/vendor/golang.org/x/text/unicode/norm/iter.go @@ -41,6 +41,7 @@ func (i *Iter) Init(f Form, src []byte) { i.next = i.rb.f.nextMain i.asciiF = nextASCIIBytes i.info = i.rb.f.info(i.rb.src, i.p) + i.rb.ss.first(i.info) } // InitString initializes i to iterate over src after normalizing it to Form f. @@ -56,11 +57,12 @@ func (i *Iter) InitString(f Form, src string) { i.next = i.rb.f.nextMain i.asciiF = nextASCIIString i.info = i.rb.f.info(i.rb.src, i.p) + i.rb.ss.first(i.info) } // Seek sets the segment to be returned by the next call to Next to start // at position p. It is the responsibility of the caller to set p to the -// start of a UTF8 rune. +// start of a segment. func (i *Iter) Seek(offset int64, whence int) (int64, error) { var abs int64 switch whence { @@ -84,6 +86,7 @@ func (i *Iter) Seek(offset int64, whence int) (int64, error) { i.multiSeg = nil i.next = i.rb.f.nextMain i.info = i.rb.f.info(i.rb.src, i.p) + i.rb.ss.first(i.info) return abs, nil } @@ -161,6 +164,7 @@ func nextHangul(i *Iter) []byte { if next >= i.rb.nsrc { i.setDone() } else if i.rb.src.hangul(next) == 0 { + i.rb.ss.next(i.info) i.info = i.rb.f.info(i.rb.src, i.p) i.next = i.rb.f.nextMain return i.next(i) @@ -204,12 +208,10 @@ func nextMultiNorm(i *Iter) []byte { if info.BoundaryBefore() { i.rb.compose() seg := i.buf[:i.rb.flushCopy(i.buf[:])] - i.rb.ss.first(info) i.rb.insertUnsafe(input{bytes: d}, j, info) i.multiSeg = d[j+int(info.size):] return seg } - i.rb.ss.next(info) i.rb.insertUnsafe(input{bytes: d}, j, info) j += int(info.size) } @@ -222,9 +224,9 @@ func nextMultiNorm(i *Iter) []byte { func nextDecomposed(i *Iter) (next []byte) { outp := 0 inCopyStart, outCopyStart := i.p, 0 - ss := mkStreamSafe(i.info) for { if sz := int(i.info.size); sz <= 1 { + i.rb.ss = 0 p := i.p i.p++ // ASCII or illegal byte. Either way, advance by 1. if i.p >= i.rb.nsrc { @@ -243,6 +245,8 @@ func nextDecomposed(i *Iter) (next []byte) { p := outp + len(d) if outp > 0 { i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p) + // TODO: this condition should not be possible, but we leave it + // in for defensive purposes. if p > len(i.buf) { return i.buf[:outp] } @@ -266,7 +270,7 @@ func nextDecomposed(i *Iter) (next []byte) { } else { i.info = i.rb.f.info(i.rb.src, i.p) } - switch ss.next(i.info) { + switch i.rb.ss.next(i.info) { case ssOverflow: i.next = nextCGJDecompose fallthrough @@ -309,7 +313,7 @@ func nextDecomposed(i *Iter) (next []byte) { } prevCC := i.info.tccc i.info = i.rb.f.info(i.rb.src, i.p) - if v := ss.next(i.info); v == ssStarter { + if v := i.rb.ss.next(i.info); v == ssStarter { break } else if v == ssOverflow { i.next = nextCGJDecompose @@ -335,10 +339,6 @@ doNorm: func doNormDecomposed(i *Iter) []byte { for { - if s := i.rb.ss.next(i.info); s == ssOverflow { - i.next = nextCGJDecompose - break - } i.rb.insertUnsafe(i.rb.src, i.p, i.info) if i.p += int(i.info.size); i.p >= i.rb.nsrc { i.setDone() @@ -348,6 +348,10 @@ func doNormDecomposed(i *Iter) []byte { if i.info.ccc == 0 { break } + if s := i.rb.ss.next(i.info); s == ssOverflow { + i.next = nextCGJDecompose + break + } } // new segment or too many combining characters: exit normalization return i.buf[:i.rb.flushCopy(i.buf[:])] @@ -357,6 +361,7 @@ func nextCGJDecompose(i *Iter) []byte { i.rb.ss = 0 i.rb.insertCGJ() i.next = nextDecomposed + i.rb.ss.first(i.info) buf := doNormDecomposed(i) return buf } @@ -365,7 +370,6 @@ func nextCGJDecompose(i *Iter) []byte { func nextComposed(i *Iter) []byte { outp, startp := 0, i.p var prevCC uint8 - ss := mkStreamSafe(i.info) for { if !i.info.isYesC() { goto doNorm @@ -385,11 +389,12 @@ func nextComposed(i *Iter) []byte { i.setDone() break } else if i.rb.src._byte(i.p) < utf8.RuneSelf { + i.rb.ss = 0 i.next = i.asciiF break } i.info = i.rb.f.info(i.rb.src, i.p) - if v := ss.next(i.info); v == ssStarter { + if v := i.rb.ss.next(i.info); v == ssStarter { break } else if v == ssOverflow { i.next = nextCGJCompose @@ -401,8 +406,10 @@ func nextComposed(i *Iter) []byte { } return i.returnSlice(startp, i.p) doNorm: + // reset to start position i.p = startp i.info = i.rb.f.info(i.rb.src, i.p) + i.rb.ss.first(i.info) if i.info.multiSegment() { d := i.info.Decomposition() info := i.rb.f.info(input{bytes: d}, 0) diff --git a/vendor/golang.org/x/text/unicode/norm/normalize.go b/vendor/golang.org/x/text/unicode/norm/normalize.go index d3f20693..e28ac641 100644 --- a/vendor/golang.org/x/text/unicode/norm/normalize.go +++ b/vendor/golang.org/x/text/unicode/norm/normalize.go @@ -324,7 +324,6 @@ func (f *formInfo) quickSpan(src input, i, end int, atEOF bool) (n int, ok bool) // have an overflow for runes that are starters (e.g. with U+FF9E). switch ss.next(info) { case ssStarter: - ss.first(info) lastSegStart = i case ssOverflow: return lastSegStart, false @@ -441,6 +440,8 @@ func (f Form) nextBoundary(src input, nsrc int, atEOF bool) int { } return -1 } + // TODO: Using streamSafe to determine the boundary isn't the same as + // using BoundaryBefore. Determine which should be used. if s := ss.next(info); s != ssSuccess { return i } @@ -505,15 +506,14 @@ func decomposeSegment(rb *reorderBuffer, sp int, atEOF bool) int { if info.size == 0 { return 0 } - if rb.nrune > 0 { - if s := rb.ss.next(info); s == ssStarter { - goto end - } else if s == ssOverflow { - rb.insertCGJ() + if s := rb.ss.next(info); s == ssStarter { + // TODO: this could be removed if we don't support merging. + if rb.nrune > 0 { goto end } - } else { - rb.ss.first(info) + } else if s == ssOverflow { + rb.insertCGJ() + goto end } if err := rb.insertFlush(rb.src, sp, info); err != iSuccess { return int(err)