luet/vendor/github.com/klauspost/compress/huff0/decompress.go
Itxaka 4adc0dc9b9
Use goreleaser to build and release (#244)
Instead of using gox on one side and an action to release, we can merge
them together with goreleaser which will build for extra targets (arm,
mips if needed in the future) and it also takes care of creating
checksums, a source archive, and a changelog and creating a release with
all the artifacts.

All binaries should respect the old naming convention, so any scripts
out there should still work.

Signed-off-by: Itxaka <igarcia@suse.com>
2021-08-11 08:30:55 +02:00

1165 lines
30 KiB
Go

package huff0
import (
"errors"
"fmt"
"io"
"github.com/klauspost/compress/fse"
)
type dTable struct {
single []dEntrySingle
double []dEntryDouble
}
// single-symbols decoding
type dEntrySingle struct {
entry uint16
}
// double-symbols decoding
type dEntryDouble struct {
seq uint16
nBits uint8
len uint8
}
// Uses special code for all tables that are < 8 bits.
const use8BitTables = true
// ReadTable will read a table from the input.
// The size of the input may be larger than the table definition.
// Any content remaining after the table definition will be returned.
// If no Scratch is provided a new one is allocated.
// The returned Scratch can be used for encoding or decoding input using this table.
func ReadTable(in []byte, s *Scratch) (s2 *Scratch, remain []byte, err error) {
s, err = s.prepare(in)
if err != nil {
return s, nil, err
}
if len(in) <= 1 {
return s, nil, errors.New("input too small for table")
}
iSize := in[0]
in = in[1:]
if iSize >= 128 {
// Uncompressed
oSize := iSize - 127
iSize = (oSize + 1) / 2
if int(iSize) > len(in) {
return s, nil, errors.New("input too small for table")
}
for n := uint8(0); n < oSize; n += 2 {
v := in[n/2]
s.huffWeight[n] = v >> 4
s.huffWeight[n+1] = v & 15
}
s.symbolLen = uint16(oSize)
in = in[iSize:]
} else {
if len(in) < int(iSize) {
return s, nil, fmt.Errorf("input too small for table, want %d bytes, have %d", iSize, len(in))
}
// FSE compressed weights
s.fse.DecompressLimit = 255
hw := s.huffWeight[:]
s.fse.Out = hw
b, err := fse.Decompress(in[:iSize], s.fse)
s.fse.Out = nil
if err != nil {
return s, nil, err
}
if len(b) > 255 {
return s, nil, errors.New("corrupt input: output table too large")
}
s.symbolLen = uint16(len(b))
in = in[iSize:]
}
// collect weight stats
var rankStats [16]uint32
weightTotal := uint32(0)
for _, v := range s.huffWeight[:s.symbolLen] {
if v > tableLogMax {
return s, nil, errors.New("corrupt input: weight too large")
}
v2 := v & 15
rankStats[v2]++
// (1 << (v2-1)) is slower since the compiler cannot prove that v2 isn't 0.
weightTotal += (1 << v2) >> 1
}
if weightTotal == 0 {
return s, nil, errors.New("corrupt input: weights zero")
}
// get last non-null symbol weight (implied, total must be 2^n)
{
tableLog := highBit32(weightTotal) + 1
if tableLog > tableLogMax {
return s, nil, errors.New("corrupt input: tableLog too big")
}
s.actualTableLog = uint8(tableLog)
// determine last weight
{
total := uint32(1) << tableLog
rest := total - weightTotal
verif := uint32(1) << highBit32(rest)
lastWeight := highBit32(rest) + 1
if verif != rest {
// last value must be a clean power of 2
return s, nil, errors.New("corrupt input: last value not power of two")
}
s.huffWeight[s.symbolLen] = uint8(lastWeight)
s.symbolLen++
rankStats[lastWeight]++
}
}
if (rankStats[1] < 2) || (rankStats[1]&1 != 0) {
// by construction : at least 2 elts of rank 1, must be even
return s, nil, errors.New("corrupt input: min elt size, even check failed ")
}
// TODO: Choose between single/double symbol decoding
// Calculate starting value for each rank
{
var nextRankStart uint32
for n := uint8(1); n < s.actualTableLog+1; n++ {
current := nextRankStart
nextRankStart += rankStats[n] << (n - 1)
rankStats[n] = current
}
}
// fill DTable (always full size)
tSize := 1 << tableLogMax
if len(s.dt.single) != tSize {
s.dt.single = make([]dEntrySingle, tSize)
}
cTable := s.prevTable
if cap(cTable) < maxSymbolValue+1 {
cTable = make([]cTableEntry, 0, maxSymbolValue+1)
}
cTable = cTable[:maxSymbolValue+1]
s.prevTable = cTable[:s.symbolLen]
s.prevTableLog = s.actualTableLog
for n, w := range s.huffWeight[:s.symbolLen] {
if w == 0 {
cTable[n] = cTableEntry{
val: 0,
nBits: 0,
}
continue
}
length := (uint32(1) << w) >> 1
d := dEntrySingle{
entry: uint16(s.actualTableLog+1-w) | (uint16(n) << 8),
}
rank := &rankStats[w]
cTable[n] = cTableEntry{
val: uint16(*rank >> (w - 1)),
nBits: uint8(d.entry),
}
single := s.dt.single[*rank : *rank+length]
for i := range single {
single[i] = d
}
*rank += length
}
return s, in, nil
}
// Decompress1X will decompress a 1X encoded stream.
// The length of the supplied input must match the end of a block exactly.
// Before this is called, the table must be initialized with ReadTable unless
// the encoder re-used the table.
// deprecated: Use the stateless Decoder() to get a concurrent version.
func (s *Scratch) Decompress1X(in []byte) (out []byte, err error) {
if cap(s.Out) < s.MaxDecodedSize {
s.Out = make([]byte, s.MaxDecodedSize)
}
s.Out = s.Out[:0:s.MaxDecodedSize]
s.Out, err = s.Decoder().Decompress1X(s.Out, in)
return s.Out, err
}
// Decompress4X will decompress a 4X encoded stream.
// Before this is called, the table must be initialized with ReadTable unless
// the encoder re-used the table.
// The length of the supplied input must match the end of a block exactly.
// The destination size of the uncompressed data must be known and provided.
// deprecated: Use the stateless Decoder() to get a concurrent version.
func (s *Scratch) Decompress4X(in []byte, dstSize int) (out []byte, err error) {
if dstSize > s.MaxDecodedSize {
return nil, ErrMaxDecodedSizeExceeded
}
if cap(s.Out) < dstSize {
s.Out = make([]byte, s.MaxDecodedSize)
}
s.Out = s.Out[:0:dstSize]
s.Out, err = s.Decoder().Decompress4X(s.Out, in)
return s.Out, err
}
// Decoder will return a stateless decoder that can be used by multiple
// decompressors concurrently.
// Before this is called, the table must be initialized with ReadTable.
// The Decoder is still linked to the scratch buffer so that cannot be reused.
// However, it is safe to discard the scratch.
func (s *Scratch) Decoder() *Decoder {
return &Decoder{
dt: s.dt,
actualTableLog: s.actualTableLog,
}
}
// Decoder provides stateless decoding.
type Decoder struct {
dt dTable
actualTableLog uint8
}
// Decompress1X will decompress a 1X encoded stream.
// The cap of the output buffer will be the maximum decompressed size.
// The length of the supplied input must match the end of a block exactly.
func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) {
if len(d.dt.single) == 0 {
return nil, errors.New("no table loaded")
}
if use8BitTables && d.actualTableLog <= 8 {
return d.decompress1X8Bit(dst, src)
}
var br bitReaderShifted
err := br.init(src)
if err != nil {
return dst, err
}
maxDecodedSize := cap(dst)
dst = dst[:0]
// Avoid bounds check by always having full sized table.
const tlSize = 1 << tableLogMax
const tlMask = tlSize - 1
dt := d.dt.single[:tlSize]
// Use temp table to avoid bound checks/append penalty.
var buf [256]byte
var off uint8
for br.off >= 8 {
br.fillFast()
v := dt[br.peekBitsFast(d.actualTableLog)&tlMask]
br.advance(uint8(v.entry))
buf[off+0] = uint8(v.entry >> 8)
v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
br.advance(uint8(v.entry))
buf[off+1] = uint8(v.entry >> 8)
// Refill
br.fillFast()
v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
br.advance(uint8(v.entry))
buf[off+2] = uint8(v.entry >> 8)
v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
br.advance(uint8(v.entry))
buf[off+3] = uint8(v.entry >> 8)
off += 4
if off == 0 {
if len(dst)+256 > maxDecodedSize {
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
dst = append(dst, buf[:]...)
}
}
if len(dst)+int(off) > maxDecodedSize {
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
dst = append(dst, buf[:off]...)
// br < 8, so uint8 is fine
bitsLeft := uint8(br.off)*8 + 64 - br.bitsRead
for bitsLeft > 0 {
br.fill()
if false && br.bitsRead >= 32 {
if br.off >= 4 {
v := br.in[br.off-4:]
v = v[:4]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
br.value = (br.value << 32) | uint64(low)
br.bitsRead -= 32
br.off -= 4
} else {
for br.off > 0 {
br.value = (br.value << 8) | uint64(br.in[br.off-1])
br.bitsRead -= 8
br.off--
}
}
}
if len(dst) >= maxDecodedSize {
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
v := d.dt.single[br.peekBitsFast(d.actualTableLog)&tlMask]
nBits := uint8(v.entry)
br.advance(nBits)
bitsLeft -= nBits
dst = append(dst, uint8(v.entry>>8))
}
return dst, br.close()
}
// decompress1X8Bit will decompress a 1X encoded stream with tablelog <= 8.
// The cap of the output buffer will be the maximum decompressed size.
// The length of the supplied input must match the end of a block exactly.
func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) {
if d.actualTableLog == 8 {
return d.decompress1X8BitExactly(dst, src)
}
var br bitReaderBytes
err := br.init(src)
if err != nil {
return dst, err
}
maxDecodedSize := cap(dst)
dst = dst[:0]
// Avoid bounds check by always having full sized table.
dt := d.dt.single[:256]
// Use temp table to avoid bound checks/append penalty.
var buf [256]byte
var off uint8
shift := (8 - d.actualTableLog) & 7
//fmt.Printf("mask: %b, tl:%d\n", mask, d.actualTableLog)
for br.off >= 4 {
br.fillFast()
v := dt[br.peekByteFast()>>shift]
br.advance(uint8(v.entry))
buf[off+0] = uint8(v.entry >> 8)
v = dt[br.peekByteFast()>>shift]
br.advance(uint8(v.entry))
buf[off+1] = uint8(v.entry >> 8)
v = dt[br.peekByteFast()>>shift]
br.advance(uint8(v.entry))
buf[off+2] = uint8(v.entry >> 8)
v = dt[br.peekByteFast()>>shift]
br.advance(uint8(v.entry))
buf[off+3] = uint8(v.entry >> 8)
off += 4
if off == 0 {
if len(dst)+256 > maxDecodedSize {
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
dst = append(dst, buf[:]...)
}
}
if len(dst)+int(off) > maxDecodedSize {
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
dst = append(dst, buf[:off]...)
// br < 4, so uint8 is fine
bitsLeft := int8(uint8(br.off)*8 + (64 - br.bitsRead))
for bitsLeft > 0 {
if br.bitsRead >= 64-8 {
for br.off > 0 {
br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
br.bitsRead -= 8
br.off--
}
}
if len(dst) >= maxDecodedSize {
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
v := dt[br.peekByteFast()>>shift]
nBits := uint8(v.entry)
br.advance(nBits)
bitsLeft -= int8(nBits)
dst = append(dst, uint8(v.entry>>8))
}
return dst, br.close()
}
// decompress1X8Bit will decompress a 1X encoded stream with tablelog <= 8.
// The cap of the output buffer will be the maximum decompressed size.
// The length of the supplied input must match the end of a block exactly.
func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) {
var br bitReaderBytes
err := br.init(src)
if err != nil {
return dst, err
}
maxDecodedSize := cap(dst)
dst = dst[:0]
// Avoid bounds check by always having full sized table.
dt := d.dt.single[:256]
// Use temp table to avoid bound checks/append penalty.
var buf [256]byte
var off uint8
const shift = 0
//fmt.Printf("mask: %b, tl:%d\n", mask, d.actualTableLog)
for br.off >= 4 {
br.fillFast()
v := dt[br.peekByteFast()>>shift]
br.advance(uint8(v.entry))
buf[off+0] = uint8(v.entry >> 8)
v = dt[br.peekByteFast()>>shift]
br.advance(uint8(v.entry))
buf[off+1] = uint8(v.entry >> 8)
v = dt[br.peekByteFast()>>shift]
br.advance(uint8(v.entry))
buf[off+2] = uint8(v.entry >> 8)
v = dt[br.peekByteFast()>>shift]
br.advance(uint8(v.entry))
buf[off+3] = uint8(v.entry >> 8)
off += 4
if off == 0 {
if len(dst)+256 > maxDecodedSize {
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
dst = append(dst, buf[:]...)
}
}
if len(dst)+int(off) > maxDecodedSize {
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
dst = append(dst, buf[:off]...)
// br < 4, so uint8 is fine
bitsLeft := int8(uint8(br.off)*8 + (64 - br.bitsRead))
for bitsLeft > 0 {
if br.bitsRead >= 64-8 {
for br.off > 0 {
br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
br.bitsRead -= 8
br.off--
}
}
if len(dst) >= maxDecodedSize {
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
v := dt[br.peekByteFast()>>shift]
nBits := uint8(v.entry)
br.advance(nBits)
bitsLeft -= int8(nBits)
dst = append(dst, uint8(v.entry>>8))
}
return dst, br.close()
}
// Decompress4X will decompress a 4X encoded stream.
// The length of the supplied input must match the end of a block exactly.
// The *capacity* of the dst slice must match the destination size of
// the uncompressed data exactly.
func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
if len(d.dt.single) == 0 {
return nil, errors.New("no table loaded")
}
if len(src) < 6+(4*1) {
return nil, errors.New("input too small")
}
if use8BitTables && d.actualTableLog <= 8 {
return d.decompress4X8bit(dst, src)
}
var br [4]bitReaderShifted
start := 6
for i := 0; i < 3; i++ {
length := int(src[i*2]) | (int(src[i*2+1]) << 8)
if start+length >= len(src) {
return nil, errors.New("truncated input (or invalid offset)")
}
err := br[i].init(src[start : start+length])
if err != nil {
return nil, err
}
start += length
}
err := br[3].init(src[start:])
if err != nil {
return nil, err
}
// destination, offset to match first output
dstSize := cap(dst)
dst = dst[:dstSize]
out := dst
dstEvery := (dstSize + 3) / 4
const tlSize = 1 << tableLogMax
const tlMask = tlSize - 1
single := d.dt.single[:tlSize]
// Use temp table to avoid bound checks/append penalty.
var buf [256]byte
var off uint8
var decoded int
// Decode 2 values from each decoder/loop.
const bufoff = 256 / 4
for {
if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
break
}
{
const stream = 0
const stream2 = 1
br[stream].fillFast()
br[stream2].fillFast()
val := br[stream].peekBitsFast(d.actualTableLog)
v := single[val&tlMask]
br[stream].advance(uint8(v.entry))
buf[off+bufoff*stream] = uint8(v.entry >> 8)
val2 := br[stream2].peekBitsFast(d.actualTableLog)
v2 := single[val2&tlMask]
br[stream2].advance(uint8(v2.entry))
buf[off+bufoff*stream2] = uint8(v2.entry >> 8)
val = br[stream].peekBitsFast(d.actualTableLog)
v = single[val&tlMask]
br[stream].advance(uint8(v.entry))
buf[off+bufoff*stream+1] = uint8(v.entry >> 8)
val2 = br[stream2].peekBitsFast(d.actualTableLog)
v2 = single[val2&tlMask]
br[stream2].advance(uint8(v2.entry))
buf[off+bufoff*stream2+1] = uint8(v2.entry >> 8)
}
{
const stream = 2
const stream2 = 3
br[stream].fillFast()
br[stream2].fillFast()
val := br[stream].peekBitsFast(d.actualTableLog)
v := single[val&tlMask]
br[stream].advance(uint8(v.entry))
buf[off+bufoff*stream] = uint8(v.entry >> 8)
val2 := br[stream2].peekBitsFast(d.actualTableLog)
v2 := single[val2&tlMask]
br[stream2].advance(uint8(v2.entry))
buf[off+bufoff*stream2] = uint8(v2.entry >> 8)
val = br[stream].peekBitsFast(d.actualTableLog)
v = single[val&tlMask]
br[stream].advance(uint8(v.entry))
buf[off+bufoff*stream+1] = uint8(v.entry >> 8)
val2 = br[stream2].peekBitsFast(d.actualTableLog)
v2 = single[val2&tlMask]
br[stream2].advance(uint8(v2.entry))
buf[off+bufoff*stream2+1] = uint8(v2.entry >> 8)
}
off += 2
if off == bufoff {
if bufoff > dstEvery {
return nil, errors.New("corruption detected: stream overrun 1")
}
copy(out, buf[:bufoff])
copy(out[dstEvery:], buf[bufoff:bufoff*2])
copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3])
copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4])
off = 0
out = out[bufoff:]
decoded += 256
// There must at least be 3 buffers left.
if len(out) < dstEvery*3 {
return nil, errors.New("corruption detected: stream overrun 2")
}
}
}
if off > 0 {
ioff := int(off)
if len(out) < dstEvery*3+ioff {
return nil, errors.New("corruption detected: stream overrun 3")
}
copy(out, buf[:off])
copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2])
copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3])
copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4])
decoded += int(off) * 4
out = out[off:]
}
// Decode remaining.
for i := range br {
offset := dstEvery * i
br := &br[i]
bitsLeft := br.off*8 + uint(64-br.bitsRead)
for bitsLeft > 0 {
br.fill()
if false && br.bitsRead >= 32 {
if br.off >= 4 {
v := br.in[br.off-4:]
v = v[:4]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
br.value = (br.value << 32) | uint64(low)
br.bitsRead -= 32
br.off -= 4
} else {
for br.off > 0 {
br.value = (br.value << 8) | uint64(br.in[br.off-1])
br.bitsRead -= 8
br.off--
}
}
}
// end inline...
if offset >= len(out) {
return nil, errors.New("corruption detected: stream overrun 4")
}
// Read value and increment offset.
val := br.peekBitsFast(d.actualTableLog)
v := single[val&tlMask].entry
nBits := uint8(v)
br.advance(nBits)
bitsLeft -= uint(nBits)
out[offset] = uint8(v >> 8)
offset++
}
decoded += offset - dstEvery*i
err = br.close()
if err != nil {
return nil, err
}
}
if dstSize != decoded {
return nil, errors.New("corruption detected: short output block")
}
return dst, nil
}
// Decompress4X will decompress a 4X encoded stream.
// The length of the supplied input must match the end of a block exactly.
// The *capacity* of the dst slice must match the destination size of
// the uncompressed data exactly.
func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) {
if d.actualTableLog == 8 {
return d.decompress4X8bitExactly(dst, src)
}
var br [4]bitReaderBytes
start := 6
for i := 0; i < 3; i++ {
length := int(src[i*2]) | (int(src[i*2+1]) << 8)
if start+length >= len(src) {
return nil, errors.New("truncated input (or invalid offset)")
}
err := br[i].init(src[start : start+length])
if err != nil {
return nil, err
}
start += length
}
err := br[3].init(src[start:])
if err != nil {
return nil, err
}
// destination, offset to match first output
dstSize := cap(dst)
dst = dst[:dstSize]
out := dst
dstEvery := (dstSize + 3) / 4
shift := (8 - d.actualTableLog) & 7
const tlSize = 1 << 8
const tlMask = tlSize - 1
single := d.dt.single[:tlSize]
// Use temp table to avoid bound checks/append penalty.
var buf [256]byte
var off uint8
var decoded int
// Decode 4 values from each decoder/loop.
const bufoff = 256 / 4
for {
if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
break
}
{
// Interleave 2 decodes.
const stream = 0
const stream2 = 1
br[stream].fillFast()
br[stream2].fillFast()
v := single[br[stream].peekByteFast()>>shift].entry
buf[off+bufoff*stream] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 := single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
buf[off+bufoff*stream+1] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
buf[off+bufoff*stream+2] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
buf[off+bufoff*stream+3] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
}
{
const stream = 2
const stream2 = 3
br[stream].fillFast()
br[stream2].fillFast()
v := single[br[stream].peekByteFast()>>shift].entry
buf[off+bufoff*stream] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 := single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
buf[off+bufoff*stream+1] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
buf[off+bufoff*stream+2] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
buf[off+bufoff*stream+3] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
}
off += 4
if off == bufoff {
if bufoff > dstEvery {
return nil, errors.New("corruption detected: stream overrun 1")
}
copy(out, buf[:bufoff])
copy(out[dstEvery:], buf[bufoff:bufoff*2])
copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3])
copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4])
off = 0
out = out[bufoff:]
decoded += 256
// There must at least be 3 buffers left.
if len(out) < dstEvery*3 {
return nil, errors.New("corruption detected: stream overrun 2")
}
}
}
if off > 0 {
ioff := int(off)
if len(out) < dstEvery*3+ioff {
return nil, errors.New("corruption detected: stream overrun 3")
}
copy(out, buf[:off])
copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2])
copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3])
copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4])
decoded += int(off) * 4
out = out[off:]
}
// Decode remaining.
for i := range br {
offset := dstEvery * i
br := &br[i]
bitsLeft := int(br.off*8) + int(64-br.bitsRead)
for bitsLeft > 0 {
if br.finished() {
return nil, io.ErrUnexpectedEOF
}
if br.bitsRead >= 56 {
if br.off >= 4 {
v := br.in[br.off-4:]
v = v[:4]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
br.value |= uint64(low) << (br.bitsRead - 32)
br.bitsRead -= 32
br.off -= 4
} else {
for br.off > 0 {
br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
br.bitsRead -= 8
br.off--
}
}
}
// end inline...
if offset >= len(out) {
return nil, errors.New("corruption detected: stream overrun 4")
}
// Read value and increment offset.
v := single[br.peekByteFast()>>shift].entry
nBits := uint8(v)
br.advance(nBits)
bitsLeft -= int(nBits)
out[offset] = uint8(v >> 8)
offset++
}
decoded += offset - dstEvery*i
err = br.close()
if err != nil {
return nil, err
}
}
if dstSize != decoded {
return nil, errors.New("corruption detected: short output block")
}
return dst, nil
}
// Decompress4X will decompress a 4X encoded stream.
// The length of the supplied input must match the end of a block exactly.
// The *capacity* of the dst slice must match the destination size of
// the uncompressed data exactly.
func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) {
var br [4]bitReaderBytes
start := 6
for i := 0; i < 3; i++ {
length := int(src[i*2]) | (int(src[i*2+1]) << 8)
if start+length >= len(src) {
return nil, errors.New("truncated input (or invalid offset)")
}
err := br[i].init(src[start : start+length])
if err != nil {
return nil, err
}
start += length
}
err := br[3].init(src[start:])
if err != nil {
return nil, err
}
// destination, offset to match first output
dstSize := cap(dst)
dst = dst[:dstSize]
out := dst
dstEvery := (dstSize + 3) / 4
const shift = 0
const tlSize = 1 << 8
const tlMask = tlSize - 1
single := d.dt.single[:tlSize]
// Use temp table to avoid bound checks/append penalty.
var buf [256]byte
var off uint8
var decoded int
// Decode 4 values from each decoder/loop.
const bufoff = 256 / 4
for {
if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
break
}
{
// Interleave 2 decodes.
const stream = 0
const stream2 = 1
br[stream].fillFast()
br[stream2].fillFast()
v := single[br[stream].peekByteFast()>>shift].entry
buf[off+bufoff*stream] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 := single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
buf[off+bufoff*stream+1] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
buf[off+bufoff*stream+2] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
buf[off+bufoff*stream+3] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
}
{
const stream = 2
const stream2 = 3
br[stream].fillFast()
br[stream2].fillFast()
v := single[br[stream].peekByteFast()>>shift].entry
buf[off+bufoff*stream] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 := single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
buf[off+bufoff*stream+1] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
buf[off+bufoff*stream+2] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
buf[off+bufoff*stream+3] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
}
off += 4
if off == bufoff {
if bufoff > dstEvery {
return nil, errors.New("corruption detected: stream overrun 1")
}
copy(out, buf[:bufoff])
copy(out[dstEvery:], buf[bufoff:bufoff*2])
copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3])
copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4])
off = 0
out = out[bufoff:]
decoded += 256
// There must at least be 3 buffers left.
if len(out) < dstEvery*3 {
return nil, errors.New("corruption detected: stream overrun 2")
}
}
}
if off > 0 {
ioff := int(off)
if len(out) < dstEvery*3+ioff {
return nil, errors.New("corruption detected: stream overrun 3")
}
copy(out, buf[:off])
copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2])
copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3])
copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4])
decoded += int(off) * 4
out = out[off:]
}
// Decode remaining.
for i := range br {
offset := dstEvery * i
br := &br[i]
bitsLeft := int(br.off*8) + int(64-br.bitsRead)
for bitsLeft > 0 {
if br.finished() {
return nil, io.ErrUnexpectedEOF
}
if br.bitsRead >= 56 {
if br.off >= 4 {
v := br.in[br.off-4:]
v = v[:4]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
br.value |= uint64(low) << (br.bitsRead - 32)
br.bitsRead -= 32
br.off -= 4
} else {
for br.off > 0 {
br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
br.bitsRead -= 8
br.off--
}
}
}
// end inline...
if offset >= len(out) {
return nil, errors.New("corruption detected: stream overrun 4")
}
// Read value and increment offset.
v := single[br.peekByteFast()>>shift].entry
nBits := uint8(v)
br.advance(nBits)
bitsLeft -= int(nBits)
out[offset] = uint8(v >> 8)
offset++
}
decoded += offset - dstEvery*i
err = br.close()
if err != nil {
return nil, err
}
}
if dstSize != decoded {
return nil, errors.New("corruption detected: short output block")
}
return dst, nil
}
// matches will compare a decoding table to a coding table.
// Errors are written to the writer.
// Nothing will be written if table is ok.
func (s *Scratch) matches(ct cTable, w io.Writer) {
if s == nil || len(s.dt.single) == 0 {
return
}
dt := s.dt.single[:1<<s.actualTableLog]
tablelog := s.actualTableLog
ok := 0
broken := 0
for sym, enc := range ct {
errs := 0
broken++
if enc.nBits == 0 {
for _, dec := range dt {
if uint8(dec.entry>>8) == byte(sym) {
fmt.Fprintf(w, "symbol %x has decoder, but no encoder\n", sym)
errs++
break
}
}
if errs == 0 {
broken--
}
continue
}
// Unused bits in input
ub := tablelog - enc.nBits
top := enc.val << ub
// decoder looks at top bits.
dec := dt[top]
if uint8(dec.entry) != enc.nBits {
fmt.Fprintf(w, "symbol 0x%x bit size mismatch (enc: %d, dec:%d).\n", sym, enc.nBits, uint8(dec.entry))
errs++
}
if uint8(dec.entry>>8) != uint8(sym) {
fmt.Fprintf(w, "symbol 0x%x decoder output mismatch (enc: %d, dec:%d).\n", sym, sym, uint8(dec.entry>>8))
errs++
}
if errs > 0 {
fmt.Fprintf(w, "%d errros in base, stopping\n", errs)
continue
}
// Ensure that all combinations are covered.
for i := uint16(0); i < (1 << ub); i++ {
vval := top | i
dec := dt[vval]
if uint8(dec.entry) != enc.nBits {
fmt.Fprintf(w, "symbol 0x%x bit size mismatch (enc: %d, dec:%d).\n", vval, enc.nBits, uint8(dec.entry))
errs++
}
if uint8(dec.entry>>8) != uint8(sym) {
fmt.Fprintf(w, "symbol 0x%x decoder output mismatch (enc: %d, dec:%d).\n", vval, sym, uint8(dec.entry>>8))
errs++
}
if errs > 20 {
fmt.Fprintf(w, "%d errros, stopping\n", errs)
break
}
}
if errs == 0 {
ok++
broken--
}
}
if broken > 0 {
fmt.Fprintf(w, "%d broken, %d ok\n", broken, ok)
}
}