Bump github.com/containers/storage from 1.37.0 to 1.38.0

Bumps [github.com/containers/storage](https://github.com/containers/storage) from 1.37.0 to 1.38.0.
- [Release notes](https://github.com/containers/storage/releases)
- [Changelog](https://github.com/containers/storage/blob/main/docs/containers-storage-changes.md)
- [Commits](https://github.com/containers/storage/compare/v1.37.0...v1.38.0)

---
updated-dependencies:
- dependency-name: github.com/containers/storage
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
This commit is contained in:
dependabot[bot]
2022-01-20 09:10:51 +00:00
committed by GitHub
parent df4d82b960
commit 1bf18b7ef8
156 changed files with 6822 additions and 1942 deletions

View File

@@ -0,0 +1,630 @@
package chunked
import (
"bytes"
"encoding/binary"
"fmt"
"io"
"io/ioutil"
"os"
"sort"
"strconv"
"strings"
"sync"
"time"
"unsafe"
storage "github.com/containers/storage"
"github.com/containers/storage/pkg/chunked/internal"
"github.com/containers/storage/pkg/ioutils"
jsoniter "github.com/json-iterator/go"
digest "github.com/opencontainers/go-digest"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)
const (
cacheKey = "chunked-manifest-cache"
cacheVersion = 1
)
type metadata struct {
tagLen int
digestLen int
tags []byte
vdata []byte
}
type layer struct {
id string
metadata *metadata
target string
}
type layersCache struct {
layers []layer
refs int
store storage.Store
mutex sync.RWMutex
created time.Time
}
var cacheMutex sync.Mutex
var cache *layersCache
func (c *layersCache) release() {
cacheMutex.Lock()
defer cacheMutex.Unlock()
c.refs--
if c.refs == 0 {
cache = nil
}
}
func getLayersCacheRef(store storage.Store) *layersCache {
cacheMutex.Lock()
defer cacheMutex.Unlock()
if cache != nil && cache.store == store && time.Since(cache.created).Minutes() < 10 {
cache.refs++
return cache
}
cache := &layersCache{
store: store,
refs: 1,
created: time.Now(),
}
return cache
}
func getLayersCache(store storage.Store) (*layersCache, error) {
c := getLayersCacheRef(store)
if err := c.load(); err != nil {
c.release()
return nil, err
}
return c, nil
}
func (c *layersCache) load() error {
c.mutex.Lock()
defer c.mutex.Unlock()
allLayers, err := c.store.Layers()
if err != nil {
return err
}
existingLayers := make(map[string]string)
for _, r := range c.layers {
existingLayers[r.id] = r.target
}
currentLayers := make(map[string]string)
for _, r := range allLayers {
currentLayers[r.ID] = r.ID
if _, found := existingLayers[r.ID]; found {
continue
}
bigData, err := c.store.LayerBigData(r.ID, cacheKey)
if err != nil {
if errors.Cause(err) == os.ErrNotExist {
continue
}
return err
}
defer bigData.Close()
metadata, err := readMetadataFromCache(bigData)
if err != nil {
logrus.Warningf("Error reading cache file for layer %q: %v", r.ID, err)
}
if metadata != nil {
c.addLayer(r.ID, metadata)
continue
}
manifestReader, err := c.store.LayerBigData(r.ID, bigDataKey)
if err != nil {
continue
}
defer manifestReader.Close()
manifest, err := ioutil.ReadAll(manifestReader)
if err != nil {
return fmt.Errorf("open manifest file for layer %q: %w", r.ID, err)
}
metadata, err = writeCache(manifest, r.ID, c.store)
if err == nil {
c.addLayer(r.ID, metadata)
}
}
var newLayers []layer
for _, l := range c.layers {
if _, found := currentLayers[l.id]; found {
newLayers = append(newLayers, l)
}
}
c.layers = newLayers
return nil
}
// calculateHardLinkFingerprint calculates a hash that can be used to verify if a file
// is usable for deduplication with hardlinks.
// To calculate the digest, it uses the file payload digest, UID, GID, mode and xattrs.
func calculateHardLinkFingerprint(f *internal.FileMetadata) (string, error) {
digester := digest.Canonical.Digester()
modeString := fmt.Sprintf("%d:%d:%o", f.UID, f.GID, f.Mode)
hash := digester.Hash()
if _, err := hash.Write([]byte(f.Digest)); err != nil {
return "", err
}
if _, err := hash.Write([]byte(modeString)); err != nil {
return "", err
}
if len(f.Xattrs) > 0 {
keys := make([]string, 0, len(f.Xattrs))
for k := range f.Xattrs {
keys = append(keys, k)
}
sort.Strings(keys)
for _, k := range keys {
if _, err := hash.Write([]byte(k)); err != nil {
return "", err
}
if _, err := hash.Write([]byte(f.Xattrs[k])); err != nil {
return "", err
}
}
}
return string(digester.Digest()), nil
}
// generateFileLocation generates a file location in the form $OFFSET@$PATH
func generateFileLocation(path string, offset uint64) []byte {
return []byte(fmt.Sprintf("%d@%s", offset, path))
}
// generateTag generates a tag in the form $DIGEST$OFFSET@LEN.
// the [OFFSET; LEN] points to the variable length data where the file locations
// are stored. $DIGEST has length digestLen stored in the metadata file header.
func generateTag(digest string, offset, len uint64) string {
return fmt.Sprintf("%s%.20d@%.20d", digest, offset, len)
}
type setBigData interface {
// SetLayerBigData stores a (possibly large) chunk of named data
SetLayerBigData(id, key string, data io.Reader) error
}
// writeCache write a cache for the layer ID.
// It generates a sorted list of digests with their offset to the path location and offset.
// The same cache is used to lookup files, chunks and candidates for deduplication with hard links.
// There are 3 kind of digests stored:
// - digest(file.payload))
// - digest(digest(file.payload) + file.UID + file.GID + file.mode + file.xattrs)
// - digest(i) for each i in chunks(file payload)
func writeCache(manifest []byte, id string, dest setBigData) (*metadata, error) {
var vdata bytes.Buffer
tagLen := 0
digestLen := 0
var tagsBuffer bytes.Buffer
toc, err := prepareMetadata(manifest)
if err != nil {
return nil, err
}
var tags []string
for _, k := range toc {
if k.Digest != "" {
location := generateFileLocation(k.Name, 0)
off := uint64(vdata.Len())
l := uint64(len(location))
d := generateTag(k.Digest, off, l)
if tagLen == 0 {
tagLen = len(d)
}
if tagLen != len(d) {
return nil, errors.New("digest with different length found")
}
tags = append(tags, d)
fp, err := calculateHardLinkFingerprint(k)
if err != nil {
return nil, err
}
d = generateTag(fp, off, l)
if tagLen != len(d) {
return nil, errors.New("digest with different length found")
}
tags = append(tags, d)
if _, err := vdata.Write(location); err != nil {
return nil, err
}
digestLen = len(k.Digest)
}
if k.ChunkDigest != "" {
location := generateFileLocation(k.Name, uint64(k.ChunkOffset))
off := uint64(vdata.Len())
l := uint64(len(location))
d := generateTag(k.ChunkDigest, off, l)
if tagLen == 0 {
tagLen = len(d)
}
if tagLen != len(d) {
return nil, errors.New("digest with different length found")
}
tags = append(tags, d)
if _, err := vdata.Write(location); err != nil {
return nil, err
}
digestLen = len(k.ChunkDigest)
}
}
sort.Strings(tags)
for _, t := range tags {
if _, err := tagsBuffer.Write([]byte(t)); err != nil {
return nil, err
}
}
pipeReader, pipeWriter := io.Pipe()
errChan := make(chan error, 1)
go func() {
defer pipeWriter.Close()
defer close(errChan)
// version
if err := binary.Write(pipeWriter, binary.LittleEndian, uint64(cacheVersion)); err != nil {
errChan <- err
return
}
// len of a tag
if err := binary.Write(pipeWriter, binary.LittleEndian, uint64(tagLen)); err != nil {
errChan <- err
return
}
// len of a digest
if err := binary.Write(pipeWriter, binary.LittleEndian, uint64(digestLen)); err != nil {
errChan <- err
return
}
// tags length
if err := binary.Write(pipeWriter, binary.LittleEndian, uint64(tagsBuffer.Len())); err != nil {
errChan <- err
return
}
// vdata length
if err := binary.Write(pipeWriter, binary.LittleEndian, uint64(vdata.Len())); err != nil {
errChan <- err
return
}
// tags
if _, err := pipeWriter.Write(tagsBuffer.Bytes()); err != nil {
errChan <- err
return
}
// variable length data
if _, err := pipeWriter.Write(vdata.Bytes()); err != nil {
errChan <- err
return
}
errChan <- nil
}()
defer pipeReader.Close()
counter := ioutils.NewWriteCounter(ioutil.Discard)
r := io.TeeReader(pipeReader, counter)
if err := dest.SetLayerBigData(id, cacheKey, r); err != nil {
return nil, err
}
if err := <-errChan; err != nil {
return nil, err
}
logrus.Debugf("Written lookaside cache for layer %q with length %v", id, counter.Count)
return &metadata{
digestLen: digestLen,
tagLen: tagLen,
tags: tagsBuffer.Bytes(),
vdata: vdata.Bytes(),
}, nil
}
func readMetadataFromCache(bigData io.Reader) (*metadata, error) {
var version, tagLen, digestLen, tagsLen, vdataLen uint64
if err := binary.Read(bigData, binary.LittleEndian, &version); err != nil {
return nil, err
}
if version != cacheVersion {
return nil, nil
}
if err := binary.Read(bigData, binary.LittleEndian, &tagLen); err != nil {
return nil, err
}
if err := binary.Read(bigData, binary.LittleEndian, &digestLen); err != nil {
return nil, err
}
if err := binary.Read(bigData, binary.LittleEndian, &tagsLen); err != nil {
return nil, err
}
if err := binary.Read(bigData, binary.LittleEndian, &vdataLen); err != nil {
return nil, err
}
tags := make([]byte, tagsLen)
if _, err := bigData.Read(tags); err != nil {
return nil, err
}
vdata := make([]byte, vdataLen)
if _, err := bigData.Read(vdata); err != nil {
return nil, err
}
return &metadata{
tagLen: int(tagLen),
digestLen: int(digestLen),
tags: tags,
vdata: vdata,
}, nil
}
func prepareMetadata(manifest []byte) ([]*internal.FileMetadata, error) {
toc, err := unmarshalToc(manifest)
if err != nil {
// ignore errors here. They might be caused by a different manifest format.
return nil, nil
}
var r []*internal.FileMetadata
chunkSeen := make(map[string]bool)
for i := range toc.Entries {
d := toc.Entries[i].Digest
if d != "" {
r = append(r, &toc.Entries[i])
continue
}
// chunks do not use hard link dedup so keeping just one candidate is enough
cd := toc.Entries[i].ChunkDigest
if cd != "" && !chunkSeen[cd] {
r = append(r, &toc.Entries[i])
chunkSeen[cd] = true
}
}
return r, nil
}
func (c *layersCache) addLayer(id string, metadata *metadata) error {
target, err := c.store.DifferTarget(id)
if err != nil {
return fmt.Errorf("get checkout directory layer %q: %w", id, err)
}
l := layer{
id: id,
metadata: metadata,
target: target,
}
c.layers = append(c.layers, l)
return nil
}
func byteSliceAsString(b []byte) string {
return *(*string)(unsafe.Pointer(&b))
}
func findTag(digest string, metadata *metadata) (string, uint64, uint64) {
if len(digest) != metadata.digestLen {
return "", 0, 0
}
nElements := len(metadata.tags) / metadata.tagLen
i := sort.Search(nElements, func(i int) bool {
d := byteSliceAsString(metadata.tags[i*metadata.tagLen : i*metadata.tagLen+metadata.digestLen])
return strings.Compare(d, digest) >= 0
})
if i < nElements {
d := string(metadata.tags[i*metadata.tagLen : i*metadata.tagLen+len(digest)])
if digest == d {
startOff := i*metadata.tagLen + metadata.digestLen
parts := strings.Split(string(metadata.tags[startOff:(i+1)*metadata.tagLen]), "@")
off, _ := strconv.ParseInt(parts[0], 10, 64)
len, _ := strconv.ParseInt(parts[1], 10, 64)
return digest, uint64(off), uint64(len)
}
}
return "", 0, 0
}
func (c *layersCache) findDigestInternal(digest string) (string, string, int64, error) {
if digest == "" {
return "", "", -1, nil
}
c.mutex.RLock()
defer c.mutex.RUnlock()
for _, layer := range c.layers {
digest, off, len := findTag(digest, layer.metadata)
if digest != "" {
position := string(layer.metadata.vdata[off : off+len])
parts := strings.SplitN(position, "@", 2)
offFile, _ := strconv.ParseInt(parts[0], 10, 64)
return layer.target, parts[1], offFile, nil
}
}
return "", "", -1, nil
}
// findFileInOtherLayers finds the specified file in other layers.
// file is the file to look for.
func (c *layersCache) findFileInOtherLayers(file *internal.FileMetadata, useHardLinks bool) (string, string, error) {
digest := file.Digest
if useHardLinks {
var err error
digest, err = calculateHardLinkFingerprint(file)
if err != nil {
return "", "", err
}
}
target, name, off, err := c.findDigestInternal(digest)
if off == 0 {
return target, name, err
}
return "", "", nil
}
func (c *layersCache) findChunkInOtherLayers(chunk *internal.FileMetadata) (string, string, int64, error) {
return c.findDigestInternal(chunk.ChunkDigest)
}
func unmarshalToc(manifest []byte) (*internal.TOC, error) {
var buf bytes.Buffer
count := 0
var toc internal.TOC
iter := jsoniter.ParseBytes(jsoniter.ConfigFastest, manifest)
for field := iter.ReadObject(); field != ""; field = iter.ReadObject() {
if field != "entries" {
iter.Skip()
continue
}
for iter.ReadArray() {
for field := iter.ReadObject(); field != ""; field = iter.ReadObject() {
switch field {
case "type", "name", "linkName", "digest", "chunkDigest", "chunkType":
count += len(iter.ReadStringAsSlice())
case "xattrs":
for key := iter.ReadObject(); key != ""; key = iter.ReadObject() {
count += len(iter.ReadStringAsSlice())
}
default:
iter.Skip()
}
}
}
break
}
buf.Grow(count)
getString := func(b []byte) string {
from := buf.Len()
buf.Write(b)
to := buf.Len()
return byteSliceAsString(buf.Bytes()[from:to])
}
iter = jsoniter.ParseBytes(jsoniter.ConfigFastest, manifest)
for field := iter.ReadObject(); field != ""; field = iter.ReadObject() {
if field == "version" {
toc.Version = iter.ReadInt()
continue
}
if field != "entries" {
iter.Skip()
continue
}
for iter.ReadArray() {
var m internal.FileMetadata
for field := iter.ReadObject(); field != ""; field = iter.ReadObject() {
switch field {
case "type":
m.Type = getString(iter.ReadStringAsSlice())
case "name":
m.Name = getString(iter.ReadStringAsSlice())
case "linkName":
m.Linkname = getString(iter.ReadStringAsSlice())
case "mode":
m.Mode = iter.ReadInt64()
case "size":
m.Size = iter.ReadInt64()
case "UID":
m.UID = iter.ReadInt()
case "GID":
m.GID = iter.ReadInt()
case "ModTime":
time, err := time.Parse(time.RFC3339, byteSliceAsString(iter.ReadStringAsSlice()))
if err != nil {
return nil, err
}
m.ModTime = &time
case "accesstime":
time, err := time.Parse(time.RFC3339, byteSliceAsString(iter.ReadStringAsSlice()))
if err != nil {
return nil, err
}
m.AccessTime = &time
case "changetime":
time, err := time.Parse(time.RFC3339, byteSliceAsString(iter.ReadStringAsSlice()))
if err != nil {
return nil, err
}
m.ChangeTime = &time
case "devMajor":
m.Devmajor = iter.ReadInt64()
case "devMinor":
m.Devminor = iter.ReadInt64()
case "digest":
m.Digest = getString(iter.ReadStringAsSlice())
case "offset":
m.Offset = iter.ReadInt64()
case "endOffset":
m.EndOffset = iter.ReadInt64()
case "chunkSize":
m.ChunkSize = iter.ReadInt64()
case "chunkOffset":
m.ChunkOffset = iter.ReadInt64()
case "chunkDigest":
m.ChunkDigest = getString(iter.ReadStringAsSlice())
case "chunkType":
m.ChunkType = getString(iter.ReadStringAsSlice())
case "xattrs":
m.Xattrs = make(map[string]string)
for key := iter.ReadObject(); key != ""; key = iter.ReadObject() {
value := iter.ReadStringAsSlice()
m.Xattrs[key] = getString(value)
}
default:
iter.Skip()
}
}
toc.Entries = append(toc.Entries, m)
}
break
}
toc.StringsBuf = buf
return &toc, nil
}

View File

@@ -5,6 +5,7 @@ package compressor
// larger software like the graph drivers.
import (
"bufio"
"encoding/base64"
"io"
"io/ioutil"
@@ -15,6 +16,189 @@ import (
"github.com/vbatts/tar-split/archive/tar"
)
const RollsumBits = 16
const holesThreshold = int64(1 << 10)
type holesFinder struct {
reader *bufio.Reader
fileOff int64
zeros int64
from int64
threshold int64
state int
}
const (
holesFinderStateRead = iota
holesFinderStateAccumulate
holesFinderStateFound
holesFinderStateEOF
)
// ReadByte reads a single byte from the underlying reader.
// If a single byte is read, the return value is (0, RAW-BYTE-VALUE, nil).
// If there are at least f.THRESHOLD consecutive zeros, then the
// return value is (N_CONSECUTIVE_ZEROS, '\x00').
func (f *holesFinder) ReadByte() (int64, byte, error) {
for {
switch f.state {
// reading the file stream
case holesFinderStateRead:
if f.zeros > 0 {
f.zeros--
return 0, 0, nil
}
b, err := f.reader.ReadByte()
if err != nil {
return 0, b, err
}
if b != 0 {
return 0, b, err
}
f.zeros = 1
if f.zeros == f.threshold {
f.state = holesFinderStateFound
} else {
f.state = holesFinderStateAccumulate
}
// accumulating zeros, but still didn't reach the threshold
case holesFinderStateAccumulate:
b, err := f.reader.ReadByte()
if err != nil {
if err == io.EOF {
f.state = holesFinderStateEOF
continue
}
return 0, b, err
}
if b == 0 {
f.zeros++
if f.zeros == f.threshold {
f.state = holesFinderStateFound
}
} else {
if f.reader.UnreadByte(); err != nil {
return 0, 0, err
}
f.state = holesFinderStateRead
}
// found a hole. Number of zeros >= threshold
case holesFinderStateFound:
b, err := f.reader.ReadByte()
if err != nil {
if err == io.EOF {
f.state = holesFinderStateEOF
}
holeLen := f.zeros
f.zeros = 0
return holeLen, 0, nil
}
if b != 0 {
if f.reader.UnreadByte(); err != nil {
return 0, 0, err
}
f.state = holesFinderStateRead
holeLen := f.zeros
f.zeros = 0
return holeLen, 0, nil
}
f.zeros++
// reached EOF. Flush pending zeros if any.
case holesFinderStateEOF:
if f.zeros > 0 {
f.zeros--
return 0, 0, nil
}
return 0, 0, io.EOF
}
}
}
type rollingChecksumReader struct {
reader *holesFinder
closed bool
rollsum *RollSum
pendingHole int64
// WrittenOut is the total number of bytes read from
// the stream.
WrittenOut int64
// IsLastChunkZeros tells whether the last generated
// chunk is a hole (made of consecutive zeros). If it
// is false, then the last chunk is a data chunk
// generated by the rolling checksum.
IsLastChunkZeros bool
}
func (rc *rollingChecksumReader) Read(b []byte) (bool, int, error) {
rc.IsLastChunkZeros = false
if rc.pendingHole > 0 {
toCopy := int64(len(b))
if rc.pendingHole < toCopy {
toCopy = rc.pendingHole
}
rc.pendingHole -= toCopy
for i := int64(0); i < toCopy; i++ {
b[i] = 0
}
rc.WrittenOut += toCopy
rc.IsLastChunkZeros = true
// if there are no other zeros left, terminate the chunk
return rc.pendingHole == 0, int(toCopy), nil
}
if rc.closed {
return false, 0, io.EOF
}
for i := 0; i < len(b); i++ {
holeLen, n, err := rc.reader.ReadByte()
if err != nil {
if err == io.EOF {
rc.closed = true
if i == 0 {
return false, 0, err
}
return false, i, nil
}
// Report any other error type
return false, -1, err
}
if holeLen > 0 {
for j := int64(0); j < holeLen; j++ {
rc.rollsum.Roll(0)
}
rc.pendingHole = holeLen
return true, i, nil
}
b[i] = n
rc.WrittenOut++
rc.rollsum.Roll(n)
if rc.rollsum.OnSplitWithBits(RollsumBits) {
return true, i + 1, nil
}
}
return false, len(b), nil
}
type chunk struct {
ChunkOffset int64
Offset int64
Checksum string
ChunkSize int64
ChunkType string
}
func writeZstdChunkedStream(destFile io.Writer, outMetadata map[string]string, reader io.Reader, level int) error {
// total written so far. Used to retrieve partial offsets in the file
dest := ioutils.NewWriteCounter(destFile)
@@ -64,40 +248,78 @@ func writeZstdChunkedStream(destFile io.Writer, outMetadata map[string]string, r
if _, err := zstdWriter.Write(rawBytes); err != nil {
return err
}
payloadDigester := digest.Canonical.Digester()
payloadChecksum := payloadDigester.Hash()
payloadDest := io.MultiWriter(payloadChecksum, zstdWriter)
payloadDigester := digest.Canonical.Digester()
chunkDigester := digest.Canonical.Digester()
// Now handle the payload, if any
var startOffset, endOffset int64
startOffset := int64(0)
lastOffset := int64(0)
lastChunkOffset := int64(0)
checksum := ""
chunks := []chunk{}
hf := &holesFinder{
threshold: holesThreshold,
reader: bufio.NewReader(tr),
}
rcReader := &rollingChecksumReader{
reader: hf,
rollsum: NewRollSum(),
}
payloadDest := io.MultiWriter(payloadDigester.Hash(), chunkDigester.Hash(), zstdWriter)
for {
read, errRead := tr.Read(buf)
mustSplit, read, errRead := rcReader.Read(buf)
if errRead != nil && errRead != io.EOF {
return err
}
// restart the compression only if there is
// a payload.
// restart the compression only if there is a payload.
if read > 0 {
if startOffset == 0 {
startOffset, err = restartCompression()
if err != nil {
return err
}
lastOffset = startOffset
}
_, err := payloadDest.Write(buf[:read])
if err != nil {
if _, err := payloadDest.Write(buf[:read]); err != nil {
return err
}
}
if (mustSplit || errRead == io.EOF) && startOffset > 0 {
off, err := restartCompression()
if err != nil {
return err
}
chunkSize := rcReader.WrittenOut - lastChunkOffset
if chunkSize > 0 {
chunkType := internal.ChunkTypeData
if rcReader.IsLastChunkZeros {
chunkType = internal.ChunkTypeZeros
}
chunks = append(chunks, chunk{
ChunkOffset: lastChunkOffset,
Offset: lastOffset,
Checksum: chunkDigester.Digest().String(),
ChunkSize: chunkSize,
ChunkType: chunkType,
})
}
lastOffset = off
lastChunkOffset = rcReader.WrittenOut
chunkDigester = digest.Canonical.Digester()
payloadDest = io.MultiWriter(payloadDigester.Hash(), chunkDigester.Hash(), zstdWriter)
}
if errRead == io.EOF {
if startOffset > 0 {
endOffset, err = restartCompression()
if err != nil {
return err
}
checksum = payloadDigester.Digest().String()
}
break
@@ -112,30 +334,42 @@ func writeZstdChunkedStream(destFile io.Writer, outMetadata map[string]string, r
for k, v := range hdr.Xattrs {
xattrs[k] = base64.StdEncoding.EncodeToString([]byte(v))
}
m := internal.FileMetadata{
Type: typ,
Name: hdr.Name,
Linkname: hdr.Linkname,
Mode: hdr.Mode,
Size: hdr.Size,
UID: hdr.Uid,
GID: hdr.Gid,
ModTime: hdr.ModTime,
AccessTime: hdr.AccessTime,
ChangeTime: hdr.ChangeTime,
Devmajor: hdr.Devmajor,
Devminor: hdr.Devminor,
Xattrs: xattrs,
Digest: checksum,
Offset: startOffset,
EndOffset: endOffset,
// ChunkSize is 0 for the last chunk
ChunkSize: 0,
ChunkOffset: 0,
ChunkDigest: checksum,
entries := []internal.FileMetadata{
{
Type: typ,
Name: hdr.Name,
Linkname: hdr.Linkname,
Mode: hdr.Mode,
Size: hdr.Size,
UID: hdr.Uid,
GID: hdr.Gid,
ModTime: &hdr.ModTime,
AccessTime: &hdr.AccessTime,
ChangeTime: &hdr.ChangeTime,
Devmajor: hdr.Devmajor,
Devminor: hdr.Devminor,
Xattrs: xattrs,
Digest: checksum,
Offset: startOffset,
EndOffset: lastOffset,
},
}
metadata = append(metadata, m)
for i := 1; i < len(chunks); i++ {
entries = append(entries, internal.FileMetadata{
Type: internal.TypeChunk,
Name: hdr.Name,
ChunkOffset: chunks[i].ChunkOffset,
})
}
if len(chunks) > 1 {
for i := range chunks {
entries[i].ChunkSize = chunks[i].ChunkSize
entries[i].Offset = chunks[i].Offset
entries[i].ChunkDigest = chunks[i].Checksum
entries[i].ChunkType = chunks[i].ChunkType
}
}
metadata = append(metadata, entries...)
}
rawBytes := tr.RawBytes()
@@ -212,7 +446,7 @@ func zstdChunkedWriterWithLevel(out io.Writer, metadata map[string]string, level
// ZstdCompressor is a CompressorFunc for the zstd compression algorithm.
func ZstdCompressor(r io.Writer, metadata map[string]string, level *int) (io.WriteCloser, error) {
if level == nil {
l := 3
l := 10
level = &l
}

View File

@@ -0,0 +1,81 @@
/*
Copyright 2011 The Perkeep Authors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package rollsum implements rolling checksums similar to apenwarr's bup, which
// is similar to librsync.
//
// The bup project is at https://github.com/apenwarr/bup and its splitting in
// particular is at https://github.com/apenwarr/bup/blob/master/lib/bup/bupsplit.c
package compressor
import (
"math/bits"
)
const windowSize = 64 // Roll assumes windowSize is a power of 2
const charOffset = 31
const blobBits = 13
const blobSize = 1 << blobBits // 8k
type RollSum struct {
s1, s2 uint32
window [windowSize]uint8
wofs int
}
func NewRollSum() *RollSum {
return &RollSum{
s1: windowSize * charOffset,
s2: windowSize * (windowSize - 1) * charOffset,
}
}
func (rs *RollSum) add(drop, add uint32) {
s1 := rs.s1 + add - drop
rs.s1 = s1
rs.s2 += s1 - uint32(windowSize)*(drop+charOffset)
}
// Roll adds ch to the rolling sum.
func (rs *RollSum) Roll(ch byte) {
wp := &rs.window[rs.wofs]
rs.add(uint32(*wp), uint32(ch))
*wp = ch
rs.wofs = (rs.wofs + 1) & (windowSize - 1)
}
// OnSplit reports whether at least 13 consecutive trailing bits of
// the current checksum are set the same way.
func (rs *RollSum) OnSplit() bool {
return (rs.s2 & (blobSize - 1)) == ((^0) & (blobSize - 1))
}
// OnSplitWithBits reports whether at least n consecutive trailing bits
// of the current checksum are set the same way.
func (rs *RollSum) OnSplitWithBits(n uint32) bool {
mask := (uint32(1) << n) - 1
return rs.s2&mask == (^uint32(0))&mask
}
func (rs *RollSum) Bits() int {
rsum := rs.Digest() >> (blobBits + 1)
return blobBits + bits.TrailingZeros32(^rsum)
}
func (rs *RollSum) Digest() uint32 {
return (rs.s1 << 16) | (rs.s2 & 0xffff)
}

View File

@@ -8,11 +8,11 @@ import (
"archive/tar"
"bytes"
"encoding/binary"
"encoding/json"
"fmt"
"io"
"time"
jsoniter "github.com/json-iterator/go"
"github.com/klauspost/compress/zstd"
"github.com/opencontainers/go-digest"
)
@@ -20,6 +20,9 @@ import (
type TOC struct {
Version int `json:"version"`
Entries []FileMetadata `json:"entries"`
// internal: used by unmarshalToc
StringsBuf bytes.Buffer `json:"-"`
}
type FileMetadata struct {
@@ -27,25 +30,33 @@ type FileMetadata struct {
Name string `json:"name"`
Linkname string `json:"linkName,omitempty"`
Mode int64 `json:"mode,omitempty"`
Size int64 `json:"size"`
UID int `json:"uid"`
GID int `json:"gid"`
ModTime time.Time `json:"modtime"`
AccessTime time.Time `json:"accesstime"`
ChangeTime time.Time `json:"changetime"`
Devmajor int64 `json:"devMajor"`
Devminor int64 `json:"devMinor"`
Size int64 `json:"size,omitempty"`
UID int `json:"uid,omitempty"`
GID int `json:"gid,omitempty"`
ModTime *time.Time `json:"modtime,omitempty"`
AccessTime *time.Time `json:"accesstime,omitempty"`
ChangeTime *time.Time `json:"changetime,omitempty"`
Devmajor int64 `json:"devMajor,omitempty"`
Devminor int64 `json:"devMinor,omitempty"`
Xattrs map[string]string `json:"xattrs,omitempty"`
Digest string `json:"digest,omitempty"`
Offset int64 `json:"offset,omitempty"`
EndOffset int64 `json:"endOffset,omitempty"`
// Currently chunking is not supported.
ChunkSize int64 `json:"chunkSize,omitempty"`
ChunkOffset int64 `json:"chunkOffset,omitempty"`
ChunkDigest string `json:"chunkDigest,omitempty"`
ChunkType string `json:"chunkType,omitempty"`
// internal: computed by mergeTOCEntries.
Chunks []*FileMetadata `json:"-"`
}
const (
ChunkTypeData = ""
ChunkTypeZeros = "zeros"
)
const (
TypeReg = "reg"
TypeChunk = "chunk"
@@ -123,6 +134,7 @@ func WriteZstdChunkedManifest(dest io.Writer, outMetadata map[string]string, off
Entries: metadata,
}
var json = jsoniter.ConfigCompatibleWithStandardLibrary
// Generate the manifest
manifest, err := json.Marshal(toc)
if err != nil {

File diff suppressed because it is too large Load Diff