mirror of
https://github.com/mudler/luet.git
synced 2025-07-14 07:34:08 +00:00
Instead of using gox on one side and an action to release, we can merge them together with goreleaser which will build for extra targets (arm, mips if needed in the future) and it also takes care of creating checksums, a source archive, and a changelog and creating a release with all the artifacts. All binaries should respect the old naming convention, so any scripts out there should still work. Signed-off-by: Itxaka <igarcia@suse.com>
1165 lines
30 KiB
Go
1165 lines
30 KiB
Go
package huff0
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
|
|
"github.com/klauspost/compress/fse"
|
|
)
|
|
|
|
type dTable struct {
|
|
single []dEntrySingle
|
|
double []dEntryDouble
|
|
}
|
|
|
|
// single-symbols decoding
|
|
type dEntrySingle struct {
|
|
entry uint16
|
|
}
|
|
|
|
// double-symbols decoding
|
|
type dEntryDouble struct {
|
|
seq uint16
|
|
nBits uint8
|
|
len uint8
|
|
}
|
|
|
|
// Uses special code for all tables that are < 8 bits.
|
|
const use8BitTables = true
|
|
|
|
// ReadTable will read a table from the input.
|
|
// The size of the input may be larger than the table definition.
|
|
// Any content remaining after the table definition will be returned.
|
|
// If no Scratch is provided a new one is allocated.
|
|
// The returned Scratch can be used for encoding or decoding input using this table.
|
|
func ReadTable(in []byte, s *Scratch) (s2 *Scratch, remain []byte, err error) {
|
|
s, err = s.prepare(in)
|
|
if err != nil {
|
|
return s, nil, err
|
|
}
|
|
if len(in) <= 1 {
|
|
return s, nil, errors.New("input too small for table")
|
|
}
|
|
iSize := in[0]
|
|
in = in[1:]
|
|
if iSize >= 128 {
|
|
// Uncompressed
|
|
oSize := iSize - 127
|
|
iSize = (oSize + 1) / 2
|
|
if int(iSize) > len(in) {
|
|
return s, nil, errors.New("input too small for table")
|
|
}
|
|
for n := uint8(0); n < oSize; n += 2 {
|
|
v := in[n/2]
|
|
s.huffWeight[n] = v >> 4
|
|
s.huffWeight[n+1] = v & 15
|
|
}
|
|
s.symbolLen = uint16(oSize)
|
|
in = in[iSize:]
|
|
} else {
|
|
if len(in) < int(iSize) {
|
|
return s, nil, fmt.Errorf("input too small for table, want %d bytes, have %d", iSize, len(in))
|
|
}
|
|
// FSE compressed weights
|
|
s.fse.DecompressLimit = 255
|
|
hw := s.huffWeight[:]
|
|
s.fse.Out = hw
|
|
b, err := fse.Decompress(in[:iSize], s.fse)
|
|
s.fse.Out = nil
|
|
if err != nil {
|
|
return s, nil, err
|
|
}
|
|
if len(b) > 255 {
|
|
return s, nil, errors.New("corrupt input: output table too large")
|
|
}
|
|
s.symbolLen = uint16(len(b))
|
|
in = in[iSize:]
|
|
}
|
|
|
|
// collect weight stats
|
|
var rankStats [16]uint32
|
|
weightTotal := uint32(0)
|
|
for _, v := range s.huffWeight[:s.symbolLen] {
|
|
if v > tableLogMax {
|
|
return s, nil, errors.New("corrupt input: weight too large")
|
|
}
|
|
v2 := v & 15
|
|
rankStats[v2]++
|
|
// (1 << (v2-1)) is slower since the compiler cannot prove that v2 isn't 0.
|
|
weightTotal += (1 << v2) >> 1
|
|
}
|
|
if weightTotal == 0 {
|
|
return s, nil, errors.New("corrupt input: weights zero")
|
|
}
|
|
|
|
// get last non-null symbol weight (implied, total must be 2^n)
|
|
{
|
|
tableLog := highBit32(weightTotal) + 1
|
|
if tableLog > tableLogMax {
|
|
return s, nil, errors.New("corrupt input: tableLog too big")
|
|
}
|
|
s.actualTableLog = uint8(tableLog)
|
|
// determine last weight
|
|
{
|
|
total := uint32(1) << tableLog
|
|
rest := total - weightTotal
|
|
verif := uint32(1) << highBit32(rest)
|
|
lastWeight := highBit32(rest) + 1
|
|
if verif != rest {
|
|
// last value must be a clean power of 2
|
|
return s, nil, errors.New("corrupt input: last value not power of two")
|
|
}
|
|
s.huffWeight[s.symbolLen] = uint8(lastWeight)
|
|
s.symbolLen++
|
|
rankStats[lastWeight]++
|
|
}
|
|
}
|
|
|
|
if (rankStats[1] < 2) || (rankStats[1]&1 != 0) {
|
|
// by construction : at least 2 elts of rank 1, must be even
|
|
return s, nil, errors.New("corrupt input: min elt size, even check failed ")
|
|
}
|
|
|
|
// TODO: Choose between single/double symbol decoding
|
|
|
|
// Calculate starting value for each rank
|
|
{
|
|
var nextRankStart uint32
|
|
for n := uint8(1); n < s.actualTableLog+1; n++ {
|
|
current := nextRankStart
|
|
nextRankStart += rankStats[n] << (n - 1)
|
|
rankStats[n] = current
|
|
}
|
|
}
|
|
|
|
// fill DTable (always full size)
|
|
tSize := 1 << tableLogMax
|
|
if len(s.dt.single) != tSize {
|
|
s.dt.single = make([]dEntrySingle, tSize)
|
|
}
|
|
cTable := s.prevTable
|
|
if cap(cTable) < maxSymbolValue+1 {
|
|
cTable = make([]cTableEntry, 0, maxSymbolValue+1)
|
|
}
|
|
cTable = cTable[:maxSymbolValue+1]
|
|
s.prevTable = cTable[:s.symbolLen]
|
|
s.prevTableLog = s.actualTableLog
|
|
|
|
for n, w := range s.huffWeight[:s.symbolLen] {
|
|
if w == 0 {
|
|
cTable[n] = cTableEntry{
|
|
val: 0,
|
|
nBits: 0,
|
|
}
|
|
continue
|
|
}
|
|
length := (uint32(1) << w) >> 1
|
|
d := dEntrySingle{
|
|
entry: uint16(s.actualTableLog+1-w) | (uint16(n) << 8),
|
|
}
|
|
|
|
rank := &rankStats[w]
|
|
cTable[n] = cTableEntry{
|
|
val: uint16(*rank >> (w - 1)),
|
|
nBits: uint8(d.entry),
|
|
}
|
|
|
|
single := s.dt.single[*rank : *rank+length]
|
|
for i := range single {
|
|
single[i] = d
|
|
}
|
|
*rank += length
|
|
}
|
|
|
|
return s, in, nil
|
|
}
|
|
|
|
// Decompress1X will decompress a 1X encoded stream.
|
|
// The length of the supplied input must match the end of a block exactly.
|
|
// Before this is called, the table must be initialized with ReadTable unless
|
|
// the encoder re-used the table.
|
|
// deprecated: Use the stateless Decoder() to get a concurrent version.
|
|
func (s *Scratch) Decompress1X(in []byte) (out []byte, err error) {
|
|
if cap(s.Out) < s.MaxDecodedSize {
|
|
s.Out = make([]byte, s.MaxDecodedSize)
|
|
}
|
|
s.Out = s.Out[:0:s.MaxDecodedSize]
|
|
s.Out, err = s.Decoder().Decompress1X(s.Out, in)
|
|
return s.Out, err
|
|
}
|
|
|
|
// Decompress4X will decompress a 4X encoded stream.
|
|
// Before this is called, the table must be initialized with ReadTable unless
|
|
// the encoder re-used the table.
|
|
// The length of the supplied input must match the end of a block exactly.
|
|
// The destination size of the uncompressed data must be known and provided.
|
|
// deprecated: Use the stateless Decoder() to get a concurrent version.
|
|
func (s *Scratch) Decompress4X(in []byte, dstSize int) (out []byte, err error) {
|
|
if dstSize > s.MaxDecodedSize {
|
|
return nil, ErrMaxDecodedSizeExceeded
|
|
}
|
|
if cap(s.Out) < dstSize {
|
|
s.Out = make([]byte, s.MaxDecodedSize)
|
|
}
|
|
s.Out = s.Out[:0:dstSize]
|
|
s.Out, err = s.Decoder().Decompress4X(s.Out, in)
|
|
return s.Out, err
|
|
}
|
|
|
|
// Decoder will return a stateless decoder that can be used by multiple
|
|
// decompressors concurrently.
|
|
// Before this is called, the table must be initialized with ReadTable.
|
|
// The Decoder is still linked to the scratch buffer so that cannot be reused.
|
|
// However, it is safe to discard the scratch.
|
|
func (s *Scratch) Decoder() *Decoder {
|
|
return &Decoder{
|
|
dt: s.dt,
|
|
actualTableLog: s.actualTableLog,
|
|
}
|
|
}
|
|
|
|
// Decoder provides stateless decoding.
|
|
type Decoder struct {
|
|
dt dTable
|
|
actualTableLog uint8
|
|
}
|
|
|
|
// Decompress1X will decompress a 1X encoded stream.
|
|
// The cap of the output buffer will be the maximum decompressed size.
|
|
// The length of the supplied input must match the end of a block exactly.
|
|
func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) {
|
|
if len(d.dt.single) == 0 {
|
|
return nil, errors.New("no table loaded")
|
|
}
|
|
if use8BitTables && d.actualTableLog <= 8 {
|
|
return d.decompress1X8Bit(dst, src)
|
|
}
|
|
var br bitReaderShifted
|
|
err := br.init(src)
|
|
if err != nil {
|
|
return dst, err
|
|
}
|
|
maxDecodedSize := cap(dst)
|
|
dst = dst[:0]
|
|
|
|
// Avoid bounds check by always having full sized table.
|
|
const tlSize = 1 << tableLogMax
|
|
const tlMask = tlSize - 1
|
|
dt := d.dt.single[:tlSize]
|
|
|
|
// Use temp table to avoid bound checks/append penalty.
|
|
var buf [256]byte
|
|
var off uint8
|
|
|
|
for br.off >= 8 {
|
|
br.fillFast()
|
|
v := dt[br.peekBitsFast(d.actualTableLog)&tlMask]
|
|
br.advance(uint8(v.entry))
|
|
buf[off+0] = uint8(v.entry >> 8)
|
|
|
|
v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
|
|
br.advance(uint8(v.entry))
|
|
buf[off+1] = uint8(v.entry >> 8)
|
|
|
|
// Refill
|
|
br.fillFast()
|
|
|
|
v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
|
|
br.advance(uint8(v.entry))
|
|
buf[off+2] = uint8(v.entry >> 8)
|
|
|
|
v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
|
|
br.advance(uint8(v.entry))
|
|
buf[off+3] = uint8(v.entry >> 8)
|
|
|
|
off += 4
|
|
if off == 0 {
|
|
if len(dst)+256 > maxDecodedSize {
|
|
br.close()
|
|
return nil, ErrMaxDecodedSizeExceeded
|
|
}
|
|
dst = append(dst, buf[:]...)
|
|
}
|
|
}
|
|
|
|
if len(dst)+int(off) > maxDecodedSize {
|
|
br.close()
|
|
return nil, ErrMaxDecodedSizeExceeded
|
|
}
|
|
dst = append(dst, buf[:off]...)
|
|
|
|
// br < 8, so uint8 is fine
|
|
bitsLeft := uint8(br.off)*8 + 64 - br.bitsRead
|
|
for bitsLeft > 0 {
|
|
br.fill()
|
|
if false && br.bitsRead >= 32 {
|
|
if br.off >= 4 {
|
|
v := br.in[br.off-4:]
|
|
v = v[:4]
|
|
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
|
|
br.value = (br.value << 32) | uint64(low)
|
|
br.bitsRead -= 32
|
|
br.off -= 4
|
|
} else {
|
|
for br.off > 0 {
|
|
br.value = (br.value << 8) | uint64(br.in[br.off-1])
|
|
br.bitsRead -= 8
|
|
br.off--
|
|
}
|
|
}
|
|
}
|
|
if len(dst) >= maxDecodedSize {
|
|
br.close()
|
|
return nil, ErrMaxDecodedSizeExceeded
|
|
}
|
|
v := d.dt.single[br.peekBitsFast(d.actualTableLog)&tlMask]
|
|
nBits := uint8(v.entry)
|
|
br.advance(nBits)
|
|
bitsLeft -= nBits
|
|
dst = append(dst, uint8(v.entry>>8))
|
|
}
|
|
return dst, br.close()
|
|
}
|
|
|
|
// decompress1X8Bit will decompress a 1X encoded stream with tablelog <= 8.
|
|
// The cap of the output buffer will be the maximum decompressed size.
|
|
// The length of the supplied input must match the end of a block exactly.
|
|
func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) {
|
|
if d.actualTableLog == 8 {
|
|
return d.decompress1X8BitExactly(dst, src)
|
|
}
|
|
var br bitReaderBytes
|
|
err := br.init(src)
|
|
if err != nil {
|
|
return dst, err
|
|
}
|
|
maxDecodedSize := cap(dst)
|
|
dst = dst[:0]
|
|
|
|
// Avoid bounds check by always having full sized table.
|
|
dt := d.dt.single[:256]
|
|
|
|
// Use temp table to avoid bound checks/append penalty.
|
|
var buf [256]byte
|
|
var off uint8
|
|
|
|
shift := (8 - d.actualTableLog) & 7
|
|
|
|
//fmt.Printf("mask: %b, tl:%d\n", mask, d.actualTableLog)
|
|
for br.off >= 4 {
|
|
br.fillFast()
|
|
v := dt[br.peekByteFast()>>shift]
|
|
br.advance(uint8(v.entry))
|
|
buf[off+0] = uint8(v.entry >> 8)
|
|
|
|
v = dt[br.peekByteFast()>>shift]
|
|
br.advance(uint8(v.entry))
|
|
buf[off+1] = uint8(v.entry >> 8)
|
|
|
|
v = dt[br.peekByteFast()>>shift]
|
|
br.advance(uint8(v.entry))
|
|
buf[off+2] = uint8(v.entry >> 8)
|
|
|
|
v = dt[br.peekByteFast()>>shift]
|
|
br.advance(uint8(v.entry))
|
|
buf[off+3] = uint8(v.entry >> 8)
|
|
|
|
off += 4
|
|
if off == 0 {
|
|
if len(dst)+256 > maxDecodedSize {
|
|
br.close()
|
|
return nil, ErrMaxDecodedSizeExceeded
|
|
}
|
|
dst = append(dst, buf[:]...)
|
|
}
|
|
}
|
|
|
|
if len(dst)+int(off) > maxDecodedSize {
|
|
br.close()
|
|
return nil, ErrMaxDecodedSizeExceeded
|
|
}
|
|
dst = append(dst, buf[:off]...)
|
|
|
|
// br < 4, so uint8 is fine
|
|
bitsLeft := int8(uint8(br.off)*8 + (64 - br.bitsRead))
|
|
for bitsLeft > 0 {
|
|
if br.bitsRead >= 64-8 {
|
|
for br.off > 0 {
|
|
br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
|
|
br.bitsRead -= 8
|
|
br.off--
|
|
}
|
|
}
|
|
if len(dst) >= maxDecodedSize {
|
|
br.close()
|
|
return nil, ErrMaxDecodedSizeExceeded
|
|
}
|
|
v := dt[br.peekByteFast()>>shift]
|
|
nBits := uint8(v.entry)
|
|
br.advance(nBits)
|
|
bitsLeft -= int8(nBits)
|
|
dst = append(dst, uint8(v.entry>>8))
|
|
}
|
|
return dst, br.close()
|
|
}
|
|
|
|
// decompress1X8Bit will decompress a 1X encoded stream with tablelog <= 8.
|
|
// The cap of the output buffer will be the maximum decompressed size.
|
|
// The length of the supplied input must match the end of a block exactly.
|
|
func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) {
|
|
var br bitReaderBytes
|
|
err := br.init(src)
|
|
if err != nil {
|
|
return dst, err
|
|
}
|
|
maxDecodedSize := cap(dst)
|
|
dst = dst[:0]
|
|
|
|
// Avoid bounds check by always having full sized table.
|
|
dt := d.dt.single[:256]
|
|
|
|
// Use temp table to avoid bound checks/append penalty.
|
|
var buf [256]byte
|
|
var off uint8
|
|
|
|
const shift = 0
|
|
|
|
//fmt.Printf("mask: %b, tl:%d\n", mask, d.actualTableLog)
|
|
for br.off >= 4 {
|
|
br.fillFast()
|
|
v := dt[br.peekByteFast()>>shift]
|
|
br.advance(uint8(v.entry))
|
|
buf[off+0] = uint8(v.entry >> 8)
|
|
|
|
v = dt[br.peekByteFast()>>shift]
|
|
br.advance(uint8(v.entry))
|
|
buf[off+1] = uint8(v.entry >> 8)
|
|
|
|
v = dt[br.peekByteFast()>>shift]
|
|
br.advance(uint8(v.entry))
|
|
buf[off+2] = uint8(v.entry >> 8)
|
|
|
|
v = dt[br.peekByteFast()>>shift]
|
|
br.advance(uint8(v.entry))
|
|
buf[off+3] = uint8(v.entry >> 8)
|
|
|
|
off += 4
|
|
if off == 0 {
|
|
if len(dst)+256 > maxDecodedSize {
|
|
br.close()
|
|
return nil, ErrMaxDecodedSizeExceeded
|
|
}
|
|
dst = append(dst, buf[:]...)
|
|
}
|
|
}
|
|
|
|
if len(dst)+int(off) > maxDecodedSize {
|
|
br.close()
|
|
return nil, ErrMaxDecodedSizeExceeded
|
|
}
|
|
dst = append(dst, buf[:off]...)
|
|
|
|
// br < 4, so uint8 is fine
|
|
bitsLeft := int8(uint8(br.off)*8 + (64 - br.bitsRead))
|
|
for bitsLeft > 0 {
|
|
if br.bitsRead >= 64-8 {
|
|
for br.off > 0 {
|
|
br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
|
|
br.bitsRead -= 8
|
|
br.off--
|
|
}
|
|
}
|
|
if len(dst) >= maxDecodedSize {
|
|
br.close()
|
|
return nil, ErrMaxDecodedSizeExceeded
|
|
}
|
|
v := dt[br.peekByteFast()>>shift]
|
|
nBits := uint8(v.entry)
|
|
br.advance(nBits)
|
|
bitsLeft -= int8(nBits)
|
|
dst = append(dst, uint8(v.entry>>8))
|
|
}
|
|
return dst, br.close()
|
|
}
|
|
|
|
// Decompress4X will decompress a 4X encoded stream.
|
|
// The length of the supplied input must match the end of a block exactly.
|
|
// The *capacity* of the dst slice must match the destination size of
|
|
// the uncompressed data exactly.
|
|
func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
|
|
if len(d.dt.single) == 0 {
|
|
return nil, errors.New("no table loaded")
|
|
}
|
|
if len(src) < 6+(4*1) {
|
|
return nil, errors.New("input too small")
|
|
}
|
|
if use8BitTables && d.actualTableLog <= 8 {
|
|
return d.decompress4X8bit(dst, src)
|
|
}
|
|
|
|
var br [4]bitReaderShifted
|
|
start := 6
|
|
for i := 0; i < 3; i++ {
|
|
length := int(src[i*2]) | (int(src[i*2+1]) << 8)
|
|
if start+length >= len(src) {
|
|
return nil, errors.New("truncated input (or invalid offset)")
|
|
}
|
|
err := br[i].init(src[start : start+length])
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
start += length
|
|
}
|
|
err := br[3].init(src[start:])
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// destination, offset to match first output
|
|
dstSize := cap(dst)
|
|
dst = dst[:dstSize]
|
|
out := dst
|
|
dstEvery := (dstSize + 3) / 4
|
|
|
|
const tlSize = 1 << tableLogMax
|
|
const tlMask = tlSize - 1
|
|
single := d.dt.single[:tlSize]
|
|
|
|
// Use temp table to avoid bound checks/append penalty.
|
|
var buf [256]byte
|
|
var off uint8
|
|
var decoded int
|
|
|
|
// Decode 2 values from each decoder/loop.
|
|
const bufoff = 256 / 4
|
|
for {
|
|
if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
|
|
break
|
|
}
|
|
|
|
{
|
|
const stream = 0
|
|
const stream2 = 1
|
|
br[stream].fillFast()
|
|
br[stream2].fillFast()
|
|
|
|
val := br[stream].peekBitsFast(d.actualTableLog)
|
|
v := single[val&tlMask]
|
|
br[stream].advance(uint8(v.entry))
|
|
buf[off+bufoff*stream] = uint8(v.entry >> 8)
|
|
|
|
val2 := br[stream2].peekBitsFast(d.actualTableLog)
|
|
v2 := single[val2&tlMask]
|
|
br[stream2].advance(uint8(v2.entry))
|
|
buf[off+bufoff*stream2] = uint8(v2.entry >> 8)
|
|
|
|
val = br[stream].peekBitsFast(d.actualTableLog)
|
|
v = single[val&tlMask]
|
|
br[stream].advance(uint8(v.entry))
|
|
buf[off+bufoff*stream+1] = uint8(v.entry >> 8)
|
|
|
|
val2 = br[stream2].peekBitsFast(d.actualTableLog)
|
|
v2 = single[val2&tlMask]
|
|
br[stream2].advance(uint8(v2.entry))
|
|
buf[off+bufoff*stream2+1] = uint8(v2.entry >> 8)
|
|
}
|
|
|
|
{
|
|
const stream = 2
|
|
const stream2 = 3
|
|
br[stream].fillFast()
|
|
br[stream2].fillFast()
|
|
|
|
val := br[stream].peekBitsFast(d.actualTableLog)
|
|
v := single[val&tlMask]
|
|
br[stream].advance(uint8(v.entry))
|
|
buf[off+bufoff*stream] = uint8(v.entry >> 8)
|
|
|
|
val2 := br[stream2].peekBitsFast(d.actualTableLog)
|
|
v2 := single[val2&tlMask]
|
|
br[stream2].advance(uint8(v2.entry))
|
|
buf[off+bufoff*stream2] = uint8(v2.entry >> 8)
|
|
|
|
val = br[stream].peekBitsFast(d.actualTableLog)
|
|
v = single[val&tlMask]
|
|
br[stream].advance(uint8(v.entry))
|
|
buf[off+bufoff*stream+1] = uint8(v.entry >> 8)
|
|
|
|
val2 = br[stream2].peekBitsFast(d.actualTableLog)
|
|
v2 = single[val2&tlMask]
|
|
br[stream2].advance(uint8(v2.entry))
|
|
buf[off+bufoff*stream2+1] = uint8(v2.entry >> 8)
|
|
}
|
|
|
|
off += 2
|
|
|
|
if off == bufoff {
|
|
if bufoff > dstEvery {
|
|
return nil, errors.New("corruption detected: stream overrun 1")
|
|
}
|
|
copy(out, buf[:bufoff])
|
|
copy(out[dstEvery:], buf[bufoff:bufoff*2])
|
|
copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3])
|
|
copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4])
|
|
off = 0
|
|
out = out[bufoff:]
|
|
decoded += 256
|
|
// There must at least be 3 buffers left.
|
|
if len(out) < dstEvery*3 {
|
|
return nil, errors.New("corruption detected: stream overrun 2")
|
|
}
|
|
}
|
|
}
|
|
if off > 0 {
|
|
ioff := int(off)
|
|
if len(out) < dstEvery*3+ioff {
|
|
return nil, errors.New("corruption detected: stream overrun 3")
|
|
}
|
|
copy(out, buf[:off])
|
|
copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2])
|
|
copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3])
|
|
copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4])
|
|
decoded += int(off) * 4
|
|
out = out[off:]
|
|
}
|
|
|
|
// Decode remaining.
|
|
for i := range br {
|
|
offset := dstEvery * i
|
|
br := &br[i]
|
|
bitsLeft := br.off*8 + uint(64-br.bitsRead)
|
|
for bitsLeft > 0 {
|
|
br.fill()
|
|
if false && br.bitsRead >= 32 {
|
|
if br.off >= 4 {
|
|
v := br.in[br.off-4:]
|
|
v = v[:4]
|
|
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
|
|
br.value = (br.value << 32) | uint64(low)
|
|
br.bitsRead -= 32
|
|
br.off -= 4
|
|
} else {
|
|
for br.off > 0 {
|
|
br.value = (br.value << 8) | uint64(br.in[br.off-1])
|
|
br.bitsRead -= 8
|
|
br.off--
|
|
}
|
|
}
|
|
}
|
|
// end inline...
|
|
if offset >= len(out) {
|
|
return nil, errors.New("corruption detected: stream overrun 4")
|
|
}
|
|
|
|
// Read value and increment offset.
|
|
val := br.peekBitsFast(d.actualTableLog)
|
|
v := single[val&tlMask].entry
|
|
nBits := uint8(v)
|
|
br.advance(nBits)
|
|
bitsLeft -= uint(nBits)
|
|
out[offset] = uint8(v >> 8)
|
|
offset++
|
|
}
|
|
decoded += offset - dstEvery*i
|
|
err = br.close()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
if dstSize != decoded {
|
|
return nil, errors.New("corruption detected: short output block")
|
|
}
|
|
return dst, nil
|
|
}
|
|
|
|
// Decompress4X will decompress a 4X encoded stream.
|
|
// The length of the supplied input must match the end of a block exactly.
|
|
// The *capacity* of the dst slice must match the destination size of
|
|
// the uncompressed data exactly.
|
|
func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) {
|
|
if d.actualTableLog == 8 {
|
|
return d.decompress4X8bitExactly(dst, src)
|
|
}
|
|
|
|
var br [4]bitReaderBytes
|
|
start := 6
|
|
for i := 0; i < 3; i++ {
|
|
length := int(src[i*2]) | (int(src[i*2+1]) << 8)
|
|
if start+length >= len(src) {
|
|
return nil, errors.New("truncated input (or invalid offset)")
|
|
}
|
|
err := br[i].init(src[start : start+length])
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
start += length
|
|
}
|
|
err := br[3].init(src[start:])
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// destination, offset to match first output
|
|
dstSize := cap(dst)
|
|
dst = dst[:dstSize]
|
|
out := dst
|
|
dstEvery := (dstSize + 3) / 4
|
|
|
|
shift := (8 - d.actualTableLog) & 7
|
|
|
|
const tlSize = 1 << 8
|
|
const tlMask = tlSize - 1
|
|
single := d.dt.single[:tlSize]
|
|
|
|
// Use temp table to avoid bound checks/append penalty.
|
|
var buf [256]byte
|
|
var off uint8
|
|
var decoded int
|
|
|
|
// Decode 4 values from each decoder/loop.
|
|
const bufoff = 256 / 4
|
|
for {
|
|
if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
|
|
break
|
|
}
|
|
|
|
{
|
|
// Interleave 2 decodes.
|
|
const stream = 0
|
|
const stream2 = 1
|
|
br[stream].fillFast()
|
|
br[stream2].fillFast()
|
|
|
|
v := single[br[stream].peekByteFast()>>shift].entry
|
|
buf[off+bufoff*stream] = uint8(v >> 8)
|
|
br[stream].advance(uint8(v))
|
|
|
|
v2 := single[br[stream2].peekByteFast()>>shift].entry
|
|
buf[off+bufoff*stream2] = uint8(v2 >> 8)
|
|
br[stream2].advance(uint8(v2))
|
|
|
|
v = single[br[stream].peekByteFast()>>shift].entry
|
|
buf[off+bufoff*stream+1] = uint8(v >> 8)
|
|
br[stream].advance(uint8(v))
|
|
|
|
v2 = single[br[stream2].peekByteFast()>>shift].entry
|
|
buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
|
|
br[stream2].advance(uint8(v2))
|
|
|
|
v = single[br[stream].peekByteFast()>>shift].entry
|
|
buf[off+bufoff*stream+2] = uint8(v >> 8)
|
|
br[stream].advance(uint8(v))
|
|
|
|
v2 = single[br[stream2].peekByteFast()>>shift].entry
|
|
buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
|
|
br[stream2].advance(uint8(v2))
|
|
|
|
v = single[br[stream].peekByteFast()>>shift].entry
|
|
buf[off+bufoff*stream+3] = uint8(v >> 8)
|
|
br[stream].advance(uint8(v))
|
|
|
|
v2 = single[br[stream2].peekByteFast()>>shift].entry
|
|
buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
|
|
br[stream2].advance(uint8(v2))
|
|
}
|
|
|
|
{
|
|
const stream = 2
|
|
const stream2 = 3
|
|
br[stream].fillFast()
|
|
br[stream2].fillFast()
|
|
|
|
v := single[br[stream].peekByteFast()>>shift].entry
|
|
buf[off+bufoff*stream] = uint8(v >> 8)
|
|
br[stream].advance(uint8(v))
|
|
|
|
v2 := single[br[stream2].peekByteFast()>>shift].entry
|
|
buf[off+bufoff*stream2] = uint8(v2 >> 8)
|
|
br[stream2].advance(uint8(v2))
|
|
|
|
v = single[br[stream].peekByteFast()>>shift].entry
|
|
buf[off+bufoff*stream+1] = uint8(v >> 8)
|
|
br[stream].advance(uint8(v))
|
|
|
|
v2 = single[br[stream2].peekByteFast()>>shift].entry
|
|
buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
|
|
br[stream2].advance(uint8(v2))
|
|
|
|
v = single[br[stream].peekByteFast()>>shift].entry
|
|
buf[off+bufoff*stream+2] = uint8(v >> 8)
|
|
br[stream].advance(uint8(v))
|
|
|
|
v2 = single[br[stream2].peekByteFast()>>shift].entry
|
|
buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
|
|
br[stream2].advance(uint8(v2))
|
|
|
|
v = single[br[stream].peekByteFast()>>shift].entry
|
|
buf[off+bufoff*stream+3] = uint8(v >> 8)
|
|
br[stream].advance(uint8(v))
|
|
|
|
v2 = single[br[stream2].peekByteFast()>>shift].entry
|
|
buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
|
|
br[stream2].advance(uint8(v2))
|
|
}
|
|
|
|
off += 4
|
|
|
|
if off == bufoff {
|
|
if bufoff > dstEvery {
|
|
return nil, errors.New("corruption detected: stream overrun 1")
|
|
}
|
|
copy(out, buf[:bufoff])
|
|
copy(out[dstEvery:], buf[bufoff:bufoff*2])
|
|
copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3])
|
|
copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4])
|
|
off = 0
|
|
out = out[bufoff:]
|
|
decoded += 256
|
|
// There must at least be 3 buffers left.
|
|
if len(out) < dstEvery*3 {
|
|
return nil, errors.New("corruption detected: stream overrun 2")
|
|
}
|
|
}
|
|
}
|
|
if off > 0 {
|
|
ioff := int(off)
|
|
if len(out) < dstEvery*3+ioff {
|
|
return nil, errors.New("corruption detected: stream overrun 3")
|
|
}
|
|
copy(out, buf[:off])
|
|
copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2])
|
|
copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3])
|
|
copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4])
|
|
decoded += int(off) * 4
|
|
out = out[off:]
|
|
}
|
|
|
|
// Decode remaining.
|
|
for i := range br {
|
|
offset := dstEvery * i
|
|
br := &br[i]
|
|
bitsLeft := int(br.off*8) + int(64-br.bitsRead)
|
|
for bitsLeft > 0 {
|
|
if br.finished() {
|
|
return nil, io.ErrUnexpectedEOF
|
|
}
|
|
if br.bitsRead >= 56 {
|
|
if br.off >= 4 {
|
|
v := br.in[br.off-4:]
|
|
v = v[:4]
|
|
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
|
|
br.value |= uint64(low) << (br.bitsRead - 32)
|
|
br.bitsRead -= 32
|
|
br.off -= 4
|
|
} else {
|
|
for br.off > 0 {
|
|
br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
|
|
br.bitsRead -= 8
|
|
br.off--
|
|
}
|
|
}
|
|
}
|
|
// end inline...
|
|
if offset >= len(out) {
|
|
return nil, errors.New("corruption detected: stream overrun 4")
|
|
}
|
|
|
|
// Read value and increment offset.
|
|
v := single[br.peekByteFast()>>shift].entry
|
|
nBits := uint8(v)
|
|
br.advance(nBits)
|
|
bitsLeft -= int(nBits)
|
|
out[offset] = uint8(v >> 8)
|
|
offset++
|
|
}
|
|
decoded += offset - dstEvery*i
|
|
err = br.close()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
if dstSize != decoded {
|
|
return nil, errors.New("corruption detected: short output block")
|
|
}
|
|
return dst, nil
|
|
}
|
|
|
|
// Decompress4X will decompress a 4X encoded stream.
|
|
// The length of the supplied input must match the end of a block exactly.
|
|
// The *capacity* of the dst slice must match the destination size of
|
|
// the uncompressed data exactly.
|
|
func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) {
|
|
var br [4]bitReaderBytes
|
|
start := 6
|
|
for i := 0; i < 3; i++ {
|
|
length := int(src[i*2]) | (int(src[i*2+1]) << 8)
|
|
if start+length >= len(src) {
|
|
return nil, errors.New("truncated input (or invalid offset)")
|
|
}
|
|
err := br[i].init(src[start : start+length])
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
start += length
|
|
}
|
|
err := br[3].init(src[start:])
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// destination, offset to match first output
|
|
dstSize := cap(dst)
|
|
dst = dst[:dstSize]
|
|
out := dst
|
|
dstEvery := (dstSize + 3) / 4
|
|
|
|
const shift = 0
|
|
const tlSize = 1 << 8
|
|
const tlMask = tlSize - 1
|
|
single := d.dt.single[:tlSize]
|
|
|
|
// Use temp table to avoid bound checks/append penalty.
|
|
var buf [256]byte
|
|
var off uint8
|
|
var decoded int
|
|
|
|
// Decode 4 values from each decoder/loop.
|
|
const bufoff = 256 / 4
|
|
for {
|
|
if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
|
|
break
|
|
}
|
|
|
|
{
|
|
// Interleave 2 decodes.
|
|
const stream = 0
|
|
const stream2 = 1
|
|
br[stream].fillFast()
|
|
br[stream2].fillFast()
|
|
|
|
v := single[br[stream].peekByteFast()>>shift].entry
|
|
buf[off+bufoff*stream] = uint8(v >> 8)
|
|
br[stream].advance(uint8(v))
|
|
|
|
v2 := single[br[stream2].peekByteFast()>>shift].entry
|
|
buf[off+bufoff*stream2] = uint8(v2 >> 8)
|
|
br[stream2].advance(uint8(v2))
|
|
|
|
v = single[br[stream].peekByteFast()>>shift].entry
|
|
buf[off+bufoff*stream+1] = uint8(v >> 8)
|
|
br[stream].advance(uint8(v))
|
|
|
|
v2 = single[br[stream2].peekByteFast()>>shift].entry
|
|
buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
|
|
br[stream2].advance(uint8(v2))
|
|
|
|
v = single[br[stream].peekByteFast()>>shift].entry
|
|
buf[off+bufoff*stream+2] = uint8(v >> 8)
|
|
br[stream].advance(uint8(v))
|
|
|
|
v2 = single[br[stream2].peekByteFast()>>shift].entry
|
|
buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
|
|
br[stream2].advance(uint8(v2))
|
|
|
|
v = single[br[stream].peekByteFast()>>shift].entry
|
|
buf[off+bufoff*stream+3] = uint8(v >> 8)
|
|
br[stream].advance(uint8(v))
|
|
|
|
v2 = single[br[stream2].peekByteFast()>>shift].entry
|
|
buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
|
|
br[stream2].advance(uint8(v2))
|
|
}
|
|
|
|
{
|
|
const stream = 2
|
|
const stream2 = 3
|
|
br[stream].fillFast()
|
|
br[stream2].fillFast()
|
|
|
|
v := single[br[stream].peekByteFast()>>shift].entry
|
|
buf[off+bufoff*stream] = uint8(v >> 8)
|
|
br[stream].advance(uint8(v))
|
|
|
|
v2 := single[br[stream2].peekByteFast()>>shift].entry
|
|
buf[off+bufoff*stream2] = uint8(v2 >> 8)
|
|
br[stream2].advance(uint8(v2))
|
|
|
|
v = single[br[stream].peekByteFast()>>shift].entry
|
|
buf[off+bufoff*stream+1] = uint8(v >> 8)
|
|
br[stream].advance(uint8(v))
|
|
|
|
v2 = single[br[stream2].peekByteFast()>>shift].entry
|
|
buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
|
|
br[stream2].advance(uint8(v2))
|
|
|
|
v = single[br[stream].peekByteFast()>>shift].entry
|
|
buf[off+bufoff*stream+2] = uint8(v >> 8)
|
|
br[stream].advance(uint8(v))
|
|
|
|
v2 = single[br[stream2].peekByteFast()>>shift].entry
|
|
buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
|
|
br[stream2].advance(uint8(v2))
|
|
|
|
v = single[br[stream].peekByteFast()>>shift].entry
|
|
buf[off+bufoff*stream+3] = uint8(v >> 8)
|
|
br[stream].advance(uint8(v))
|
|
|
|
v2 = single[br[stream2].peekByteFast()>>shift].entry
|
|
buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
|
|
br[stream2].advance(uint8(v2))
|
|
}
|
|
|
|
off += 4
|
|
|
|
if off == bufoff {
|
|
if bufoff > dstEvery {
|
|
return nil, errors.New("corruption detected: stream overrun 1")
|
|
}
|
|
copy(out, buf[:bufoff])
|
|
copy(out[dstEvery:], buf[bufoff:bufoff*2])
|
|
copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3])
|
|
copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4])
|
|
off = 0
|
|
out = out[bufoff:]
|
|
decoded += 256
|
|
// There must at least be 3 buffers left.
|
|
if len(out) < dstEvery*3 {
|
|
return nil, errors.New("corruption detected: stream overrun 2")
|
|
}
|
|
}
|
|
}
|
|
if off > 0 {
|
|
ioff := int(off)
|
|
if len(out) < dstEvery*3+ioff {
|
|
return nil, errors.New("corruption detected: stream overrun 3")
|
|
}
|
|
copy(out, buf[:off])
|
|
copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2])
|
|
copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3])
|
|
copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4])
|
|
decoded += int(off) * 4
|
|
out = out[off:]
|
|
}
|
|
|
|
// Decode remaining.
|
|
for i := range br {
|
|
offset := dstEvery * i
|
|
br := &br[i]
|
|
bitsLeft := int(br.off*8) + int(64-br.bitsRead)
|
|
for bitsLeft > 0 {
|
|
if br.finished() {
|
|
return nil, io.ErrUnexpectedEOF
|
|
}
|
|
if br.bitsRead >= 56 {
|
|
if br.off >= 4 {
|
|
v := br.in[br.off-4:]
|
|
v = v[:4]
|
|
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
|
|
br.value |= uint64(low) << (br.bitsRead - 32)
|
|
br.bitsRead -= 32
|
|
br.off -= 4
|
|
} else {
|
|
for br.off > 0 {
|
|
br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
|
|
br.bitsRead -= 8
|
|
br.off--
|
|
}
|
|
}
|
|
}
|
|
// end inline...
|
|
if offset >= len(out) {
|
|
return nil, errors.New("corruption detected: stream overrun 4")
|
|
}
|
|
|
|
// Read value and increment offset.
|
|
v := single[br.peekByteFast()>>shift].entry
|
|
nBits := uint8(v)
|
|
br.advance(nBits)
|
|
bitsLeft -= int(nBits)
|
|
out[offset] = uint8(v >> 8)
|
|
offset++
|
|
}
|
|
decoded += offset - dstEvery*i
|
|
err = br.close()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
if dstSize != decoded {
|
|
return nil, errors.New("corruption detected: short output block")
|
|
}
|
|
return dst, nil
|
|
}
|
|
|
|
// matches will compare a decoding table to a coding table.
|
|
// Errors are written to the writer.
|
|
// Nothing will be written if table is ok.
|
|
func (s *Scratch) matches(ct cTable, w io.Writer) {
|
|
if s == nil || len(s.dt.single) == 0 {
|
|
return
|
|
}
|
|
dt := s.dt.single[:1<<s.actualTableLog]
|
|
tablelog := s.actualTableLog
|
|
ok := 0
|
|
broken := 0
|
|
for sym, enc := range ct {
|
|
errs := 0
|
|
broken++
|
|
if enc.nBits == 0 {
|
|
for _, dec := range dt {
|
|
if uint8(dec.entry>>8) == byte(sym) {
|
|
fmt.Fprintf(w, "symbol %x has decoder, but no encoder\n", sym)
|
|
errs++
|
|
break
|
|
}
|
|
}
|
|
if errs == 0 {
|
|
broken--
|
|
}
|
|
continue
|
|
}
|
|
// Unused bits in input
|
|
ub := tablelog - enc.nBits
|
|
top := enc.val << ub
|
|
// decoder looks at top bits.
|
|
dec := dt[top]
|
|
if uint8(dec.entry) != enc.nBits {
|
|
fmt.Fprintf(w, "symbol 0x%x bit size mismatch (enc: %d, dec:%d).\n", sym, enc.nBits, uint8(dec.entry))
|
|
errs++
|
|
}
|
|
if uint8(dec.entry>>8) != uint8(sym) {
|
|
fmt.Fprintf(w, "symbol 0x%x decoder output mismatch (enc: %d, dec:%d).\n", sym, sym, uint8(dec.entry>>8))
|
|
errs++
|
|
}
|
|
if errs > 0 {
|
|
fmt.Fprintf(w, "%d errros in base, stopping\n", errs)
|
|
continue
|
|
}
|
|
// Ensure that all combinations are covered.
|
|
for i := uint16(0); i < (1 << ub); i++ {
|
|
vval := top | i
|
|
dec := dt[vval]
|
|
if uint8(dec.entry) != enc.nBits {
|
|
fmt.Fprintf(w, "symbol 0x%x bit size mismatch (enc: %d, dec:%d).\n", vval, enc.nBits, uint8(dec.entry))
|
|
errs++
|
|
}
|
|
if uint8(dec.entry>>8) != uint8(sym) {
|
|
fmt.Fprintf(w, "symbol 0x%x decoder output mismatch (enc: %d, dec:%d).\n", vval, sym, uint8(dec.entry>>8))
|
|
errs++
|
|
}
|
|
if errs > 20 {
|
|
fmt.Fprintf(w, "%d errros, stopping\n", errs)
|
|
break
|
|
}
|
|
}
|
|
if errs == 0 {
|
|
ok++
|
|
broken--
|
|
}
|
|
}
|
|
if broken > 0 {
|
|
fmt.Fprintf(w, "%d broken, %d ok\n", broken, ok)
|
|
}
|
|
}
|