fix(deps): update module github.com/containers/storage to v1.57.0

Signed-off-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
This commit is contained in:
renovate[bot]
2025-01-30 18:12:33 +00:00
committed by GitHub
parent 8380f284c7
commit 85fa4dff42
68 changed files with 1852 additions and 455 deletions

View File

@@ -0,0 +1,163 @@
package dedup
import (
"crypto/sha256"
"encoding/binary"
"errors"
"fmt"
"hash/crc64"
"io/fs"
"sync"
"github.com/opencontainers/selinux/pkg/pwalkdir"
"github.com/sirupsen/logrus"
)
var notSupported = errors.New("reflinks are not supported on this platform")
const (
DedupHashInvalid DedupHashMethod = iota
DedupHashCRC
DedupHashFileSize
DedupHashSHA256
)
type DedupHashMethod int
type DedupOptions struct {
// HashMethod is the hash function to use to find identical files
HashMethod DedupHashMethod
}
type DedupResult struct {
// Deduped represents the total number of bytes saved by deduplication.
// This value accounts also for all previously deduplicated data, not only the savings
// from the last run.
Deduped uint64
}
func getFileChecksum(hashMethod DedupHashMethod, path string, info fs.FileInfo) (string, error) {
switch hashMethod {
case DedupHashInvalid:
return "", fmt.Errorf("invalid hash method: %v", hashMethod)
case DedupHashFileSize:
return fmt.Sprintf("%v", info.Size()), nil
case DedupHashSHA256:
return readAllFile(path, info, func(buf []byte) (string, error) {
h := sha256.New()
if _, err := h.Write(buf); err != nil {
return "", err
}
return string(h.Sum(nil)), nil
})
case DedupHashCRC:
return readAllFile(path, info, func(buf []byte) (string, error) {
c := crc64.New(crc64.MakeTable(crc64.ECMA))
if _, err := c.Write(buf); err != nil {
return "", err
}
bufRet := make([]byte, 8)
binary.BigEndian.PutUint64(bufRet, c.Sum64())
return string(bufRet), nil
})
default:
return "", fmt.Errorf("unknown hash method: %v", hashMethod)
}
}
type pathsLocked struct {
paths []string
lock sync.Mutex
}
func DedupDirs(dirs []string, options DedupOptions) (DedupResult, error) {
res := DedupResult{}
hashToPaths := make(map[string]*pathsLocked)
lock := sync.Mutex{} // protects `hashToPaths` and `res`
dedup, err := newDedupFiles()
if err != nil {
return res, err
}
for _, dir := range dirs {
logrus.Debugf("Deduping directory %s", dir)
if err := pwalkdir.Walk(dir, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
if !d.Type().IsRegular() {
return nil
}
info, err := d.Info()
if err != nil {
return err
}
size := uint64(info.Size())
if size == 0 {
// do not bother with empty files
return nil
}
// the file was already deduplicated
if visited, err := dedup.isFirstVisitOf(info); err != nil {
return err
} else if visited {
return nil
}
h, err := getFileChecksum(options.HashMethod, path, info)
if err != nil {
return err
}
lock.Lock()
item, foundItem := hashToPaths[h]
if !foundItem {
item = &pathsLocked{paths: []string{path}}
hashToPaths[h] = item
lock.Unlock()
return nil
}
item.lock.Lock()
lock.Unlock()
dedupBytes, err := func() (uint64, error) { // function to have a scope for the defer statement
defer item.lock.Unlock()
var dedupBytes uint64
for _, src := range item.paths {
deduped, err := dedup.dedup(src, path, info)
if err == nil && deduped > 0 {
logrus.Debugf("Deduped %q -> %q (%d bytes)", src, path, deduped)
dedupBytes += deduped
break
}
logrus.Debugf("Failed to deduplicate: %v", err)
if errors.Is(err, notSupported) {
return dedupBytes, err
}
}
if dedupBytes == 0 {
item.paths = append(item.paths, path)
}
return dedupBytes, nil
}()
if err != nil {
return err
}
lock.Lock()
res.Deduped += dedupBytes
lock.Unlock()
return nil
}); err != nil {
// if reflinks are not supported, return immediately without errors
if errors.Is(err, notSupported) {
return res, nil
}
return res, err
}
}
return res, nil
}

View File

@@ -0,0 +1,139 @@
package dedup
import (
"errors"
"fmt"
"io"
"io/fs"
"os"
"sync"
"syscall"
"golang.org/x/sys/unix"
)
type deviceInodePair struct {
dev uint64
ino uint64
}
type dedupFiles struct {
lock sync.Mutex
visitedInodes map[deviceInodePair]struct{}
}
func newDedupFiles() (*dedupFiles, error) {
return &dedupFiles{
visitedInodes: make(map[deviceInodePair]struct{}),
}, nil
}
func (d *dedupFiles) recordInode(dev, ino uint64) (bool, error) {
d.lock.Lock()
defer d.lock.Unlock()
di := deviceInodePair{
dev: dev,
ino: ino,
}
_, visited := d.visitedInodes[di]
d.visitedInodes[di] = struct{}{}
return visited, nil
}
// isFirstVisitOf records that the file is being processed. Returns true if the file was already visited.
func (d *dedupFiles) isFirstVisitOf(fi fs.FileInfo) (bool, error) {
st, ok := fi.Sys().(*syscall.Stat_t)
if !ok {
return false, fmt.Errorf("unable to get raw syscall.Stat_t data")
}
return d.recordInode(uint64(st.Dev), st.Ino)
}
// dedup deduplicates the file at src path to dst path
func (d *dedupFiles) dedup(src, dst string, fiDst fs.FileInfo) (uint64, error) {
srcFile, err := os.OpenFile(src, os.O_RDONLY, 0)
if err != nil {
return 0, fmt.Errorf("failed to open source file: %w", err)
}
defer srcFile.Close()
dstFile, err := os.OpenFile(dst, os.O_WRONLY, 0)
if err != nil {
return 0, fmt.Errorf("failed to open destination file: %w", err)
}
defer dstFile.Close()
stSrc, err := srcFile.Stat()
if err != nil {
return 0, fmt.Errorf("failed to stat source file: %w", err)
}
sSrc, ok := stSrc.Sys().(*syscall.Stat_t)
if !ok {
return 0, fmt.Errorf("unable to get raw syscall.Stat_t data")
}
sDest, ok := fiDst.Sys().(*syscall.Stat_t)
if !ok {
return 0, fmt.Errorf("unable to get raw syscall.Stat_t data")
}
if sSrc.Dev == sDest.Dev && sSrc.Ino == sDest.Ino {
// same inode, we are dealing with a hard link, no need to deduplicate
return 0, nil
}
value := unix.FileDedupeRange{
Src_offset: 0,
Src_length: uint64(stSrc.Size()),
Info: []unix.FileDedupeRangeInfo{
{
Dest_fd: int64(dstFile.Fd()),
Dest_offset: 0,
},
},
}
err = unix.IoctlFileDedupeRange(int(srcFile.Fd()), &value)
if err == nil {
return uint64(value.Info[0].Bytes_deduped), nil
}
if errors.Is(err, unix.ENOTSUP) {
return 0, notSupported
}
return 0, fmt.Errorf("failed to clone file %q: %w", src, err)
}
func readAllFile(path string, info fs.FileInfo, fn func([]byte) (string, error)) (string, error) {
size := info.Size()
if size == 0 {
return fn(nil)
}
file, err := os.Open(path)
if err != nil {
return "", err
}
defer file.Close()
if size < 4096 {
// small file, read it all
data := make([]byte, size)
_, err = io.ReadFull(file, data)
if err != nil {
return "", err
}
return fn(data)
}
mmap, err := unix.Mmap(int(file.Fd()), 0, int(size), unix.PROT_READ, unix.MAP_PRIVATE)
if err != nil {
return "", fmt.Errorf("failed to mmap file: %w", err)
}
defer func() {
_ = unix.Munmap(mmap)
}()
_ = unix.Madvise(mmap, unix.MADV_SEQUENTIAL)
return fn(mmap)
}

View File

@@ -0,0 +1,27 @@
//go:build !linux
package dedup
import (
"io/fs"
)
type dedupFiles struct{}
func newDedupFiles() (*dedupFiles, error) {
return nil, notSupported
}
// isFirstVisitOf records that the file is being processed. Returns true if the file was already visited.
func (d *dedupFiles) isFirstVisitOf(fi fs.FileInfo) (bool, error) {
return false, notSupported
}
// dedup deduplicates the file at src path to dst path
func (d *dedupFiles) dedup(src, dst string, fiDst fs.FileInfo) (uint64, error) {
return 0, notSupported
}
func readAllFile(path string, info fs.FileInfo, fn func([]byte) (string, error)) (string, error) {
return "", notSupported
}