mirror of
https://github.com/containers/skopeo.git
synced 2025-05-05 14:37:12 +00:00
2643 lines
86 KiB
Go
2643 lines
86 KiB
Go
package storage
|
||
|
||
import (
|
||
"bytes"
|
||
"errors"
|
||
"fmt"
|
||
"io"
|
||
"maps"
|
||
"math/bits"
|
||
"os"
|
||
"path"
|
||
"path/filepath"
|
||
"reflect"
|
||
"slices"
|
||
"sort"
|
||
"strings"
|
||
"sync"
|
||
"time"
|
||
|
||
drivers "github.com/containers/storage/drivers"
|
||
"github.com/containers/storage/pkg/archive"
|
||
"github.com/containers/storage/pkg/idtools"
|
||
"github.com/containers/storage/pkg/ioutils"
|
||
"github.com/containers/storage/pkg/lockfile"
|
||
"github.com/containers/storage/pkg/mount"
|
||
"github.com/containers/storage/pkg/stringid"
|
||
"github.com/containers/storage/pkg/system"
|
||
"github.com/containers/storage/pkg/tarlog"
|
||
"github.com/containers/storage/pkg/truncindex"
|
||
"github.com/klauspost/pgzip"
|
||
digest "github.com/opencontainers/go-digest"
|
||
"github.com/opencontainers/selinux/go-selinux"
|
||
"github.com/sirupsen/logrus"
|
||
"github.com/vbatts/tar-split/archive/tar"
|
||
"github.com/vbatts/tar-split/tar/asm"
|
||
"github.com/vbatts/tar-split/tar/storage"
|
||
)
|
||
|
||
const (
|
||
tarSplitSuffix = ".tar-split.gz"
|
||
incompleteFlag = "incomplete"
|
||
// maxLayerStoreCleanupIterations is the number of times we try to clean up inconsistent layer store state
|
||
// in readers (which, for implementation reasons, gives other writers the opportunity to create more inconsistent state)
|
||
// until we just give up.
|
||
maxLayerStoreCleanupIterations = 3
|
||
)
|
||
|
||
type layerLocations uint8
|
||
|
||
// The backing store is split in three json files.
|
||
// The volatile store is written without fsync() meaning it isn't as robust to unclean shutdown.
|
||
// Optionally, an image store can be configured to store RO layers.
|
||
// The stable store is used for the remaining layers that don't go into the other stores.
|
||
const (
|
||
stableLayerLocation layerLocations = 1 << iota
|
||
imageStoreLayerLocation
|
||
volatileLayerLocation
|
||
|
||
numLayerLocationIndex = iota
|
||
)
|
||
|
||
func layerLocationFromIndex(index int) layerLocations {
|
||
return 1 << index
|
||
}
|
||
|
||
func indexFromLayerLocation(location layerLocations) int {
|
||
return bits.TrailingZeros(uint(location))
|
||
}
|
||
|
||
// A Layer is a record of a copy-on-write layer that's stored by the lower
|
||
// level graph driver.
|
||
type Layer struct {
|
||
// ID is either one which was specified at create-time, or a random
|
||
// value which was generated by the library.
|
||
ID string `json:"id"`
|
||
|
||
// Names is an optional set of user-defined convenience values. The
|
||
// layer can be referred to by its ID or any of its names. Names are
|
||
// unique among layers.
|
||
Names []string `json:"names,omitempty"`
|
||
|
||
// Parent is the ID of a layer from which this layer inherits data.
|
||
Parent string `json:"parent,omitempty"`
|
||
|
||
// Metadata is data we keep for the convenience of the caller. It is not
|
||
// expected to be large, since it is kept in memory.
|
||
Metadata string `json:"metadata,omitempty"`
|
||
|
||
// MountLabel is an SELinux label which should be used when attempting to mount
|
||
// the layer.
|
||
MountLabel string `json:"mountlabel,omitempty"`
|
||
|
||
// MountPoint is the path where the layer is mounted, or where it was most
|
||
// recently mounted.
|
||
//
|
||
// WARNING: This field is a snapshot in time: (except for users inside c/storage that
|
||
// hold the mount lock) the true value can change between subsequent
|
||
// calls to c/storage API.
|
||
//
|
||
// Users that need to handle concurrent mount/unmount attempts should not access this
|
||
// field at all, and should only use the path returned by .Mount() (and that’s only
|
||
// assuming no other user will concurrently decide to unmount that mount point).
|
||
MountPoint string `json:"-"`
|
||
|
||
// MountCount is used as a reference count for the container's layer being
|
||
// mounted at the mount point.
|
||
//
|
||
// WARNING: This field is a snapshot in time; (except for users inside c/storage that
|
||
// hold the mount lock) the true value can change between subsequent
|
||
// calls to c/storage API.
|
||
//
|
||
// In situations where concurrent mount/unmount attempts can happen, this field
|
||
// should not be used for any decisions, maybe apart from heuristic user warnings.
|
||
MountCount int `json:"-"`
|
||
|
||
// Created is the datestamp for when this layer was created. Older
|
||
// versions of the library did not track this information, so callers
|
||
// will likely want to use the IsZero() method to verify that a value
|
||
// is set before using it.
|
||
Created time.Time `json:"created,omitempty"`
|
||
|
||
// CompressedDigest is the digest of the blob that was last passed to
|
||
// ApplyDiff() or create(), as it was presented to us.
|
||
CompressedDigest digest.Digest `json:"compressed-diff-digest,omitempty"`
|
||
|
||
// CompressedSize is the length of the blob that was last passed to
|
||
// ApplyDiff() or create(), as it was presented to us. If
|
||
// CompressedDigest is not set, this should be treated as if it were an
|
||
// uninitialized value.
|
||
CompressedSize int64 `json:"compressed-size,omitempty"`
|
||
|
||
// UncompressedDigest is the digest of the blob that was last passed to
|
||
// ApplyDiff() or create(), after we decompressed it. Often referred to
|
||
// as a DiffID.
|
||
UncompressedDigest digest.Digest `json:"diff-digest,omitempty"`
|
||
|
||
// TOCDigest represents the digest of the Table of Contents (TOC) of the blob.
|
||
// This digest is utilized when the UncompressedDigest is not
|
||
// validated during the partial image pull process, but the
|
||
// TOC itself is validated.
|
||
// It serves as an alternative reference under these specific conditions.
|
||
TOCDigest digest.Digest `json:"toc-digest,omitempty"`
|
||
|
||
// UncompressedSize is the length of the blob that was last passed to
|
||
// ApplyDiff() or create(), after we decompressed it.
|
||
//
|
||
// - If UncompressedDigest is set, this must be set to a valid value.
|
||
// - Otherwise, if TOCDigest is set, this is either valid or -1.
|
||
// - If neither of this digests is set, this should be treated as if it were
|
||
// an uninitialized value.
|
||
UncompressedSize int64 `json:"diff-size,omitempty"`
|
||
|
||
// CompressionType is the type of compression which we detected on the blob
|
||
// that was last passed to ApplyDiff() or create().
|
||
CompressionType archive.Compression `json:"compression,omitempty"`
|
||
|
||
// UIDs and GIDs are lists of UIDs and GIDs used in the layer. This
|
||
// field is only populated (i.e., will only contain one or more
|
||
// entries) if the layer was created using ApplyDiff() or create().
|
||
UIDs []uint32 `json:"uidset,omitempty"`
|
||
GIDs []uint32 `json:"gidset,omitempty"`
|
||
|
||
// Flags is arbitrary data about the layer.
|
||
Flags map[string]any `json:"flags,omitempty"`
|
||
|
||
// UIDMap and GIDMap are used for setting up a layer's contents
|
||
// for use inside of a user namespace where UID mapping is being used.
|
||
UIDMap []idtools.IDMap `json:"uidmap,omitempty"`
|
||
GIDMap []idtools.IDMap `json:"gidmap,omitempty"`
|
||
|
||
// ReadOnly is true if this layer resides in a read-only layer store.
|
||
ReadOnly bool `json:"-"`
|
||
|
||
// location is the location of the store where the layer is present.
|
||
location layerLocations `json:"-"`
|
||
|
||
// BigDataNames is a list of names of data items that we keep for the
|
||
// convenience of the caller. They can be large, and are only in
|
||
// memory when being read from or written to disk.
|
||
BigDataNames []string `json:"big-data-names,omitempty"`
|
||
}
|
||
|
||
type layerMountPoint struct {
|
||
ID string `json:"id"`
|
||
MountPoint string `json:"path"`
|
||
MountCount int `json:"count"`
|
||
}
|
||
|
||
// DiffOptions override the default behavior of Diff() methods.
|
||
type DiffOptions struct {
|
||
// Compression, if set overrides the default compressor when generating a diff.
|
||
Compression *archive.Compression
|
||
}
|
||
|
||
// stagedLayerOptions are the options passed to .create to populate a staged
|
||
// layer
|
||
type stagedLayerOptions struct {
|
||
DiffOutput *drivers.DriverWithDifferOutput
|
||
DiffOptions *drivers.ApplyDiffWithDifferOpts
|
||
}
|
||
|
||
// roLayerStore wraps a graph driver, adding the ability to refer to layers by
|
||
// name, and keeping track of parent-child relationships, along with a list of
|
||
// all known layers.
|
||
type roLayerStore interface {
|
||
roMetadataStore
|
||
roLayerBigDataStore
|
||
|
||
// startReading makes sure the store is fresh, and locks it for reading.
|
||
// If this succeeds, the caller MUST call stopReading().
|
||
startReading() error
|
||
|
||
// stopReading releases locks obtained by startReading.
|
||
stopReading()
|
||
|
||
// Exists checks if a layer with the specified name or ID is known.
|
||
Exists(id string) bool
|
||
|
||
// Get retrieves information about a layer given an ID or name.
|
||
Get(id string) (*Layer, error)
|
||
|
||
// Status returns an slice of key-value pairs, suitable for human consumption,
|
||
// relaying whatever status information the underlying driver can share.
|
||
Status() ([][2]string, error)
|
||
|
||
// Changes returns a slice of Change structures, which contain a pathname
|
||
// (Path) and a description of what sort of change (Kind) was made by the
|
||
// layer (either ChangeModify, ChangeAdd, or ChangeDelete), relative to a
|
||
// specified layer. By default, the layer's parent is used as a reference.
|
||
Changes(from, to string) ([]archive.Change, error)
|
||
|
||
// Diff produces a tarstream which can be applied to a layer with the contents
|
||
// of the first layer to produce a layer with the contents of the second layer.
|
||
// By default, the parent of the second layer is used as the first
|
||
// layer, so it need not be specified. Options can be used to override
|
||
// default behavior, but are also not required.
|
||
Diff(from, to string, options *DiffOptions) (io.ReadCloser, error)
|
||
|
||
// DiffSize produces an estimate of the length of the tarstream which would be
|
||
// produced by Diff.
|
||
DiffSize(from, to string) (int64, error)
|
||
|
||
// Size produces a cached value for the uncompressed size of the layer,
|
||
// if one is known, or -1 if it is not known. If the layer can not be
|
||
// found, it returns an error.
|
||
Size(name string) (int64, error)
|
||
|
||
// LayersByCompressedDigest returns a slice of the layers with the
|
||
// specified compressed digest value recorded for them.
|
||
LayersByCompressedDigest(d digest.Digest) ([]Layer, error)
|
||
|
||
// LayersByUncompressedDigest returns a slice of the layers with the
|
||
// specified uncompressed digest value recorded for them.
|
||
LayersByUncompressedDigest(d digest.Digest) ([]Layer, error)
|
||
|
||
// LayersByTOCDigest returns a slice of the layers with the
|
||
// specified uncompressed digest value recorded for them.
|
||
LayersByTOCDigest(d digest.Digest) ([]Layer, error)
|
||
|
||
// Layers returns a slice of the known layers.
|
||
Layers() ([]Layer, error)
|
||
}
|
||
|
||
// rwLayerStore wraps a graph driver, adding the ability to refer to layers by
|
||
// name, and keeping track of parent-child relationships, along with a list of
|
||
// all known layers.
|
||
type rwLayerStore interface {
|
||
roLayerStore
|
||
rwMetadataStore
|
||
flaggableStore
|
||
rwLayerBigDataStore
|
||
|
||
// startWriting makes sure the store is fresh, and locks it for writing.
|
||
// If this succeeds, the caller MUST call stopWriting().
|
||
startWriting() error
|
||
|
||
// stopWriting releases locks obtained by startWriting.
|
||
stopWriting()
|
||
|
||
// create creates a new layer, optionally giving it a specified ID rather than
|
||
// a randomly-generated one, either inheriting data from another specified
|
||
// layer or the empty base layer. The new layer can optionally be given names
|
||
// and have an SELinux label specified for use when mounting it. Some
|
||
// underlying drivers can accept a "size" option. At this time, most
|
||
// underlying drivers do not themselves distinguish between writeable
|
||
// and read-only layers. Returns the new layer structure and the size of the
|
||
// diff which was applied to its parent to initialize its contents.
|
||
create(id string, parent *Layer, names []string, mountLabel string, options map[string]string, moreOptions *LayerOptions, writeable bool, diff io.Reader, slo *stagedLayerOptions) (*Layer, int64, error)
|
||
|
||
// updateNames modifies names associated with a layer based on (op, names).
|
||
updateNames(id string, names []string, op updateNameOperation) error
|
||
|
||
// Delete deletes a layer with the specified name or ID.
|
||
Delete(id string) error
|
||
|
||
// Wipe deletes all layers.
|
||
Wipe() error
|
||
|
||
// Mount mounts a layer for use. If the specified layer is the parent of other
|
||
// layers, it should not be written to. An SELinux label to be applied to the
|
||
// mount can be specified to override the one configured for the layer.
|
||
// The mappings used by the container can be specified.
|
||
Mount(id string, options drivers.MountOpts) (string, error)
|
||
|
||
// unmount unmounts a layer when it is no longer in use.
|
||
// If conditional is set, it will fail with ErrLayerNotMounted if the layer is not mounted (without conditional, the caller is
|
||
// making a promise that the layer is actually mounted).
|
||
// If force is set, it will physically try to unmount it even if it is mounted multiple times, or even if (!conditional and)
|
||
// there are no records of it being mounted in the first place.
|
||
// It returns whether the layer was still mounted at the time this function returned.
|
||
// WARNING: The return value may already be obsolete by the time it is available
|
||
// to the caller, so it can be used for heuristic sanity checks at best. It should almost always be ignored.
|
||
unmount(id string, force bool, conditional bool) (bool, error)
|
||
|
||
// Mounted returns number of times the layer has been mounted.
|
||
Mounted(id string) (int, error)
|
||
|
||
// ParentOwners returns the UIDs and GIDs of parents of the layer's mountpoint
|
||
// for which the layer's UID and GID maps don't contain corresponding entries.
|
||
ParentOwners(id string) (uids, gids []int, err error)
|
||
|
||
// ApplyDiff reads a tarstream which was created by a previous call to Diff and
|
||
// applies its changes to a specified layer.
|
||
ApplyDiff(to string, diff io.Reader) (int64, error)
|
||
|
||
// applyDiffWithDifferNoLock applies the changes through the differ callback function.
|
||
applyDiffWithDifferNoLock(options *drivers.ApplyDiffWithDifferOpts, differ drivers.Differ) (*drivers.DriverWithDifferOutput, error)
|
||
|
||
// CleanupStagingDirectory cleanups the staging directory. It can be used to cleanup the staging directory on errors
|
||
CleanupStagingDirectory(stagingDirectory string) error
|
||
|
||
// applyDiffFromStagingDirectory uses diffOutput.Target to create the diff.
|
||
applyDiffFromStagingDirectory(id string, diffOutput *drivers.DriverWithDifferOutput, options *drivers.ApplyDiffWithDifferOpts) error
|
||
|
||
// DifferTarget gets the location where files are stored for the layer.
|
||
DifferTarget(id string) (string, error)
|
||
|
||
// PutAdditionalLayer creates a layer using the diff contained in the additional layer
|
||
// store.
|
||
// This API is experimental and can be changed without bumping the major version number.
|
||
PutAdditionalLayer(id string, parentLayer *Layer, names []string, aLayer drivers.AdditionalLayer) (layer *Layer, err error)
|
||
|
||
// Clean up unreferenced layers
|
||
GarbageCollect() error
|
||
|
||
// Dedup deduplicates layers in the store.
|
||
dedup(drivers.DedupArgs) (drivers.DedupResult, error)
|
||
}
|
||
|
||
type multipleLockFile struct {
|
||
lockfiles []*lockfile.LockFile
|
||
}
|
||
|
||
func (l multipleLockFile) Lock() {
|
||
for _, lock := range l.lockfiles {
|
||
lock.Lock()
|
||
}
|
||
}
|
||
|
||
func (l multipleLockFile) RLock() {
|
||
for _, lock := range l.lockfiles {
|
||
lock.RLock()
|
||
}
|
||
}
|
||
|
||
func (l multipleLockFile) Unlock() {
|
||
for _, lock := range l.lockfiles {
|
||
lock.Unlock()
|
||
}
|
||
}
|
||
|
||
func (l multipleLockFile) ModifiedSince(lastWrite lockfile.LastWrite) (lockfile.LastWrite, bool, error) {
|
||
// Look up only the first lockfile, since this is the value returned by RecordWrite().
|
||
return l.lockfiles[0].ModifiedSince(lastWrite)
|
||
}
|
||
|
||
func (l multipleLockFile) AssertLockedForWriting() {
|
||
for _, lock := range l.lockfiles {
|
||
lock.AssertLockedForWriting()
|
||
}
|
||
}
|
||
|
||
func (l multipleLockFile) GetLastWrite() (lockfile.LastWrite, error) {
|
||
return l.lockfiles[0].GetLastWrite()
|
||
}
|
||
|
||
func (l multipleLockFile) RecordWrite() (lockfile.LastWrite, error) {
|
||
var lastWrite *lockfile.LastWrite
|
||
for _, lock := range l.lockfiles {
|
||
lw, err := lock.RecordWrite()
|
||
if err != nil {
|
||
return lw, err
|
||
}
|
||
// Return the first value we get so we know that
|
||
// all the locks have a write time >= to this one.
|
||
if lastWrite == nil {
|
||
lastWrite = &lw
|
||
}
|
||
}
|
||
return *lastWrite, nil
|
||
}
|
||
|
||
func (l multipleLockFile) IsReadWrite() bool {
|
||
return l.lockfiles[0].IsReadWrite()
|
||
}
|
||
|
||
func newMultipleLockFile(l ...*lockfile.LockFile) *multipleLockFile {
|
||
return &multipleLockFile{lockfiles: l}
|
||
}
|
||
|
||
type layerStore struct {
|
||
// The following fields are only set when constructing layerStore, and must never be modified afterwards.
|
||
// They are safe to access without any other locking.
|
||
lockfile *multipleLockFile // lockfile.IsReadWrite can be used to distinguish between read-write and read-only layer stores.
|
||
mountsLockfile *lockfile.LockFile // Can _only_ be obtained with inProcessLock held.
|
||
rundir string
|
||
jsonPath [numLayerLocationIndex]string
|
||
layerdir string
|
||
|
||
inProcessLock sync.RWMutex // Can _only_ be obtained with lockfile held.
|
||
// The following fields can only be read/written with read/write ownership of inProcessLock, respectively.
|
||
// Almost all users should use startReading() or startWriting().
|
||
lastWrite lockfile.LastWrite
|
||
mountsLastWrite lockfile.LastWrite // Only valid if lockfile.IsReadWrite()
|
||
layers []*Layer
|
||
idindex *truncindex.TruncIndex
|
||
byid map[string]*Layer
|
||
byname map[string]*Layer
|
||
bymount map[string]*Layer
|
||
bycompressedsum map[digest.Digest][]string
|
||
byuncompressedsum map[digest.Digest][]string
|
||
bytocsum map[digest.Digest][]string
|
||
layerspathsModified [numLayerLocationIndex]time.Time
|
||
|
||
// FIXME: This field is only set when constructing layerStore, but locking rules of the driver
|
||
// interface itself are not documented here.
|
||
driver drivers.Driver
|
||
}
|
||
|
||
func copyLayer(l *Layer) *Layer {
|
||
return &Layer{
|
||
ID: l.ID,
|
||
Names: copySlicePreferringNil(l.Names),
|
||
Parent: l.Parent,
|
||
Metadata: l.Metadata,
|
||
MountLabel: l.MountLabel,
|
||
MountPoint: l.MountPoint,
|
||
MountCount: l.MountCount,
|
||
Created: l.Created,
|
||
CompressedDigest: l.CompressedDigest,
|
||
CompressedSize: l.CompressedSize,
|
||
UncompressedDigest: l.UncompressedDigest,
|
||
UncompressedSize: l.UncompressedSize,
|
||
TOCDigest: l.TOCDigest,
|
||
CompressionType: l.CompressionType,
|
||
ReadOnly: l.ReadOnly,
|
||
location: l.location,
|
||
BigDataNames: copySlicePreferringNil(l.BigDataNames),
|
||
Flags: copyMapPreferringNil(l.Flags),
|
||
UIDMap: copySlicePreferringNil(l.UIDMap),
|
||
GIDMap: copySlicePreferringNil(l.GIDMap),
|
||
UIDs: copySlicePreferringNil(l.UIDs),
|
||
GIDs: copySlicePreferringNil(l.GIDs),
|
||
}
|
||
}
|
||
|
||
// startWritingWithReload makes sure the store is fresh if canReload, and locks it for writing.
|
||
// If this succeeds, the caller MUST call stopWriting().
|
||
//
|
||
// This is an internal implementation detail of layerStore construction, every other caller
|
||
// should use startWriting() instead.
|
||
func (r *layerStore) startWritingWithReload(canReload bool) error {
|
||
r.lockfile.Lock()
|
||
r.inProcessLock.Lock()
|
||
succeeded := false
|
||
defer func() {
|
||
if !succeeded {
|
||
r.inProcessLock.Unlock()
|
||
r.lockfile.Unlock()
|
||
}
|
||
}()
|
||
|
||
if canReload {
|
||
if _, err := r.reloadIfChanged(true); err != nil {
|
||
return err
|
||
}
|
||
}
|
||
|
||
succeeded = true
|
||
return nil
|
||
}
|
||
|
||
// startWriting makes sure the store is fresh, and locks it for writing.
|
||
// If this succeeds, the caller MUST call stopWriting().
|
||
func (r *layerStore) startWriting() error {
|
||
return r.startWritingWithReload(true)
|
||
}
|
||
|
||
// stopWriting releases locks obtained by startWriting.
|
||
func (r *layerStore) stopWriting() {
|
||
r.inProcessLock.Unlock()
|
||
r.lockfile.Unlock()
|
||
}
|
||
|
||
// startReadingWithReload makes sure the store is fresh if canReload, and locks it for reading.
|
||
// If this succeeds, the caller MUST call stopReading().
|
||
//
|
||
// This is an internal implementation detail of layerStore construction, every other caller
|
||
// should use startReading() instead.
|
||
func (r *layerStore) startReadingWithReload(canReload bool) error {
|
||
// inProcessLocked calls the nested function with r.inProcessLock held for writing.
|
||
inProcessLocked := func(fn func() error) error {
|
||
r.inProcessLock.Lock()
|
||
defer r.inProcessLock.Unlock()
|
||
return fn()
|
||
}
|
||
|
||
r.lockfile.RLock()
|
||
unlockFn := r.lockfile.Unlock // A function to call to clean up, or nil
|
||
defer func() {
|
||
if unlockFn != nil {
|
||
unlockFn()
|
||
}
|
||
}()
|
||
r.inProcessLock.RLock()
|
||
unlockFn = r.stopReading
|
||
|
||
if canReload {
|
||
// If we are lucky, we can just hold the read locks, check that we are fresh, and continue.
|
||
modified, err := r.modified()
|
||
if err != nil {
|
||
return err
|
||
}
|
||
if modified {
|
||
// We are unlucky, and need to reload.
|
||
// NOTE: Multiple goroutines can get to this place approximately simultaneously.
|
||
r.inProcessLock.RUnlock()
|
||
unlockFn = r.lockfile.Unlock
|
||
|
||
cleanupsDone := 0
|
||
for {
|
||
// First try reloading with r.lockfile held for reading.
|
||
// r.inProcessLock will serialize all goroutines that got here;
|
||
// each will re-check on-disk state vs. r.lastWrite, and the first one will actually reload the data.
|
||
var tryLockedForWriting bool
|
||
err := inProcessLocked(func() error {
|
||
var err error
|
||
tryLockedForWriting, err = r.reloadIfChanged(false)
|
||
return err
|
||
})
|
||
if err == nil {
|
||
break
|
||
}
|
||
if !tryLockedForWriting {
|
||
return err
|
||
}
|
||
if cleanupsDone >= maxLayerStoreCleanupIterations {
|
||
return fmt.Errorf("(even after %d cleanup attempts:) %w", cleanupsDone, err)
|
||
}
|
||
// Not good enough, we need r.lockfile held for writing. So, let’s do that.
|
||
unlockFn()
|
||
unlockFn = nil
|
||
|
||
r.lockfile.Lock()
|
||
unlockFn = r.lockfile.Unlock
|
||
if err := inProcessLocked(func() error {
|
||
_, err := r.reloadIfChanged(true)
|
||
return err
|
||
}); err != nil {
|
||
return err
|
||
}
|
||
unlockFn()
|
||
unlockFn = nil
|
||
|
||
r.lockfile.RLock()
|
||
unlockFn = r.lockfile.Unlock
|
||
// We need to check for a reload again because the on-disk state could have been modified
|
||
// after we released the lock.
|
||
cleanupsDone++
|
||
}
|
||
|
||
// NOTE that we hold neither a read nor write inProcessLock at this point. That’s fine in ordinary operation, because
|
||
// the on-filesystem r.lockfile should protect us against (cooperating) writers, and any use of r.inProcessLock
|
||
// protects us against in-process writers modifying data.
|
||
// In presence of non-cooperating writers, we just ensure that 1) the in-memory data is not clearly out-of-date
|
||
// and 2) access to the in-memory data is not racy;
|
||
// but we can’t protect against those out-of-process writers modifying _files_ while we are assuming they are in a consistent state.
|
||
|
||
r.inProcessLock.RLock()
|
||
}
|
||
}
|
||
|
||
unlockFn = nil
|
||
return nil
|
||
}
|
||
|
||
// startReading makes sure the store is fresh, and locks it for reading.
|
||
// If this succeeds, the caller MUST call stopReading().
|
||
func (r *layerStore) startReading() error {
|
||
return r.startReadingWithReload(true)
|
||
}
|
||
|
||
// stopReading releases locks obtained by startReading.
|
||
func (r *layerStore) stopReading() {
|
||
r.inProcessLock.RUnlock()
|
||
r.lockfile.Unlock()
|
||
}
|
||
|
||
// modified returns true if the on-disk state (of layers or mounts) has changed (ie if reloadIcHanged may need to modify the store)
|
||
//
|
||
// Note that unlike containerStore.modified and imageStore.modified, this function is not directly used in layerStore.reloadIfChanged();
|
||
// it exists only to help the reader ensure it has fresh enough state.
|
||
//
|
||
// The caller must hold r.lockfile for reading _or_ writing.
|
||
// The caller must hold r.inProcessLock for reading or writing.
|
||
func (r *layerStore) modified() (bool, error) {
|
||
_, m, err := r.layersModified()
|
||
if err != nil {
|
||
return false, err
|
||
}
|
||
if m {
|
||
return true, nil
|
||
}
|
||
if r.lockfile.IsReadWrite() {
|
||
// This means we get, release, and re-obtain, r.mountsLockfile if we actually need to do any kind of reload.
|
||
// That’s a bit expensive, but hopefully most callers will be read-only and see no changes.
|
||
// We can’t eliminate these mountsLockfile accesses given the current assumption that Layer objects have _some_ not-very-obsolete
|
||
// mount data. Maybe we can segregate the mount-dependent and mount-independent operations better...
|
||
r.mountsLockfile.RLock()
|
||
defer r.mountsLockfile.Unlock()
|
||
_, m, err := r.mountsModified()
|
||
if err != nil {
|
||
return false, err
|
||
}
|
||
if m {
|
||
return true, nil
|
||
}
|
||
}
|
||
return false, nil
|
||
}
|
||
|
||
// layersModified() checks if the most recent writer to r.jsonPath[] was a party other than the
|
||
// last recorded writer. If so, it returns a lockfile.LastWrite value to record on a successful
|
||
// reload.
|
||
// It should only be called with the lock held.
|
||
// The caller must hold r.inProcessLock for reading.
|
||
func (r *layerStore) layersModified() (lockfile.LastWrite, bool, error) {
|
||
lastWrite, modified, err := r.lockfile.ModifiedSince(r.lastWrite)
|
||
if err != nil {
|
||
return lockfile.LastWrite{}, modified, err
|
||
}
|
||
if modified {
|
||
return lastWrite, true, nil
|
||
}
|
||
|
||
// If the layers.json file or container-layers.json has been
|
||
// modified manually, then we have to reload the storage in
|
||
// any case.
|
||
for locationIndex := range numLayerLocationIndex {
|
||
rpath := r.jsonPath[locationIndex]
|
||
if rpath == "" {
|
||
continue
|
||
}
|
||
info, err := os.Stat(rpath)
|
||
if err != nil && !os.IsNotExist(err) {
|
||
return lockfile.LastWrite{}, false, fmt.Errorf("stat layers file: %w", err)
|
||
}
|
||
if info != nil && info.ModTime() != r.layerspathsModified[locationIndex] {
|
||
// In this case the LastWrite value is equal to r.lastWrite; writing it back doesn’t hurt.
|
||
return lastWrite, true, nil
|
||
}
|
||
}
|
||
|
||
return lockfile.LastWrite{}, false, nil
|
||
}
|
||
|
||
// reloadIfChanged reloads the contents of the store from disk if it is changed.
|
||
//
|
||
// The caller must hold r.lockfile for reading _or_ writing; lockedForWriting is true
|
||
// if it is held for writing.
|
||
//
|
||
// The caller must hold r.inProcessLock for WRITING.
|
||
//
|
||
// If !lockedForWriting and this function fails, the return value indicates whether
|
||
// reloadIfChanged() with lockedForWriting could succeed.
|
||
func (r *layerStore) reloadIfChanged(lockedForWriting bool) (bool, error) {
|
||
lastWrite, layersModified, err := r.layersModified()
|
||
if err != nil {
|
||
return false, err
|
||
}
|
||
if layersModified {
|
||
// r.load also reloads mounts data; so, on this path, we don’t need to call reloadMountsIfChanged.
|
||
if tryLockedForWriting, err := r.load(lockedForWriting); err != nil {
|
||
return tryLockedForWriting, err // r.lastWrite is unchanged, so we will load the next time again.
|
||
}
|
||
r.lastWrite = lastWrite
|
||
return false, nil
|
||
}
|
||
if r.lockfile.IsReadWrite() {
|
||
r.mountsLockfile.RLock()
|
||
defer r.mountsLockfile.Unlock()
|
||
if err := r.reloadMountsIfChanged(); err != nil {
|
||
return false, err
|
||
}
|
||
}
|
||
return false, nil
|
||
}
|
||
|
||
// mountsModified returns true if the on-disk mount state has changed (i.e. if reloadMountsIfChanged may need to modify the store),
|
||
// and a lockfile.LastWrite value for that update.
|
||
//
|
||
// The caller must hold r.mountsLockfile for reading _or_ writing.
|
||
// The caller must hold r.inProcessLock for reading or writing.
|
||
func (r *layerStore) mountsModified() (lockfile.LastWrite, bool, error) {
|
||
return r.mountsLockfile.ModifiedSince(r.mountsLastWrite)
|
||
}
|
||
|
||
// reloadMountsIfChanged reloads the contents of mountsPath from disk if it is changed.
|
||
//
|
||
// The caller must hold r.mountsLockFile for reading or writing.
|
||
func (r *layerStore) reloadMountsIfChanged() error {
|
||
lastWrite, modified, err := r.mountsModified()
|
||
if err != nil {
|
||
return err
|
||
}
|
||
if modified {
|
||
if err = r.loadMounts(); err != nil {
|
||
return err
|
||
}
|
||
r.mountsLastWrite = lastWrite
|
||
}
|
||
return nil
|
||
}
|
||
|
||
// Requires startReading or startWriting.
|
||
func (r *layerStore) Layers() ([]Layer, error) {
|
||
layers := make([]Layer, len(r.layers))
|
||
for i := range r.layers {
|
||
layers[i] = *copyLayer(r.layers[i])
|
||
}
|
||
return layers, nil
|
||
}
|
||
|
||
// Requires startWriting.
|
||
func (r *layerStore) GarbageCollect() error {
|
||
layers, err := r.driver.ListLayers()
|
||
if err != nil {
|
||
if errors.Is(err, drivers.ErrNotSupported) {
|
||
return nil
|
||
}
|
||
return err
|
||
}
|
||
|
||
for _, id := range layers {
|
||
// Is the id still referenced
|
||
if r.byid[id] != nil {
|
||
continue
|
||
}
|
||
|
||
// Remove layer and any related data of unreferenced id
|
||
if err := r.driver.Remove(id); err != nil {
|
||
logrus.Debugf("removing driver layer %q", id)
|
||
return err
|
||
}
|
||
|
||
logrus.Debugf("removing %q", r.tspath(id))
|
||
os.Remove(r.tspath(id))
|
||
logrus.Debugf("removing %q", r.datadir(id))
|
||
os.RemoveAll(r.datadir(id))
|
||
}
|
||
return nil
|
||
}
|
||
|
||
func (r *layerStore) mountspath() string {
|
||
return filepath.Join(r.rundir, "mountpoints.json")
|
||
}
|
||
|
||
// load reloads the contents of the store from disk.
|
||
//
|
||
// Most callers should call reloadIfChanged() instead, to avoid overhead and to correctly
|
||
// manage r.lastWrite.
|
||
//
|
||
// As a side effect, this sets r.mountsLastWrite.
|
||
//
|
||
// The caller must hold r.lockfile for reading _or_ writing; lockedForWriting is true
|
||
// if it is held for writing.
|
||
// The caller must hold r.inProcessLock for WRITING.
|
||
//
|
||
// If !lockedForWriting and this function fails, the return value indicates whether
|
||
// retrying with lockedForWriting could succeed.
|
||
func (r *layerStore) load(lockedForWriting bool) (bool, error) {
|
||
var modifiedLocations layerLocations
|
||
|
||
layers := []*Layer{}
|
||
ids := make(map[string]*Layer)
|
||
|
||
for locationIndex := range numLayerLocationIndex {
|
||
location := layerLocationFromIndex(locationIndex)
|
||
rpath := r.jsonPath[locationIndex]
|
||
if rpath == "" {
|
||
continue
|
||
}
|
||
info, err := os.Stat(rpath)
|
||
if err != nil {
|
||
if !os.IsNotExist(err) {
|
||
return false, err
|
||
}
|
||
} else {
|
||
r.layerspathsModified[locationIndex] = info.ModTime()
|
||
}
|
||
data, err := os.ReadFile(rpath)
|
||
if err != nil && !os.IsNotExist(err) {
|
||
return false, err
|
||
}
|
||
|
||
locationLayers := []*Layer{}
|
||
if len(data) != 0 {
|
||
if err := json.Unmarshal(data, &locationLayers); err != nil {
|
||
return false, fmt.Errorf("loading %q: %w", rpath, err)
|
||
}
|
||
}
|
||
|
||
for _, layer := range locationLayers {
|
||
// There should be no duplicated ids between json files, but lets check to be sure
|
||
if ids[layer.ID] != nil {
|
||
continue // skip invalid duplicated layer
|
||
}
|
||
// Remember where the layer came from
|
||
layer.location = location
|
||
layers = append(layers, layer)
|
||
ids[layer.ID] = layer
|
||
}
|
||
}
|
||
|
||
idlist := make([]string, 0, len(layers))
|
||
names := make(map[string]*Layer)
|
||
compressedsums := make(map[digest.Digest][]string)
|
||
uncompressedsums := make(map[digest.Digest][]string)
|
||
tocsums := make(map[digest.Digest][]string)
|
||
var errorToResolveBySaving error // == nil; if there are multiple errors, this is one of them.
|
||
if r.lockfile.IsReadWrite() {
|
||
selinux.ClearLabels()
|
||
}
|
||
for n, layer := range layers {
|
||
idlist = append(idlist, layer.ID)
|
||
for _, name := range layer.Names {
|
||
if conflict, ok := names[name]; ok {
|
||
r.removeName(conflict, name)
|
||
errorToResolveBySaving = ErrDuplicateLayerNames
|
||
modifiedLocations |= conflict.location
|
||
}
|
||
names[name] = layers[n]
|
||
}
|
||
if layer.CompressedDigest != "" {
|
||
compressedsums[layer.CompressedDigest] = append(compressedsums[layer.CompressedDigest], layer.ID)
|
||
}
|
||
if layer.UncompressedDigest != "" {
|
||
uncompressedsums[layer.UncompressedDigest] = append(uncompressedsums[layer.UncompressedDigest], layer.ID)
|
||
}
|
||
if layer.TOCDigest != "" {
|
||
tocsums[layer.TOCDigest] = append(tocsums[layer.TOCDigest], layer.ID)
|
||
}
|
||
if layer.MountLabel != "" {
|
||
selinux.ReserveLabel(layer.MountLabel)
|
||
}
|
||
layer.ReadOnly = !r.lockfile.IsReadWrite()
|
||
// The r.lockfile.IsReadWrite() condition maintains past practice:
|
||
// Incomplete layers in a read-only store are not treated as a reason to refuse to use other layers from that store
|
||
// (OTOH creating child layers on top would probably lead to problems?).
|
||
// We do remove incomplete layers in read-write stores so that we don’t build on top of them.
|
||
if layerHasIncompleteFlag(layer) && r.lockfile.IsReadWrite() {
|
||
errorToResolveBySaving = errors.New("an incomplete layer exists and can't be cleaned up")
|
||
}
|
||
}
|
||
|
||
if errorToResolveBySaving != nil {
|
||
if !r.lockfile.IsReadWrite() {
|
||
return false, errorToResolveBySaving
|
||
}
|
||
if !lockedForWriting {
|
||
return true, errorToResolveBySaving
|
||
}
|
||
}
|
||
r.layers = layers
|
||
r.idindex = truncindex.NewTruncIndex(idlist) // Invalid values in idlist are ignored: they are not a reason to refuse processing the whole store.
|
||
r.byid = ids
|
||
r.byname = names
|
||
r.bycompressedsum = compressedsums
|
||
r.byuncompressedsum = uncompressedsums
|
||
r.bytocsum = tocsums
|
||
|
||
// Load and merge information about which layers are mounted, and where.
|
||
if r.lockfile.IsReadWrite() {
|
||
r.mountsLockfile.RLock()
|
||
defer r.mountsLockfile.Unlock()
|
||
// We need to reload mounts unconditionally, becuause by creating r.layers from scratch, we have discarded the previous
|
||
// information, if any. So, obtain a fresh mountsLastWrite value so that we don’t unnecessarily reload the data
|
||
// afterwards.
|
||
mountsLastWrite, err := r.mountsLockfile.GetLastWrite()
|
||
if err != nil {
|
||
return false, err
|
||
}
|
||
if err := r.loadMounts(); err != nil {
|
||
return false, err
|
||
}
|
||
r.mountsLastWrite = mountsLastWrite
|
||
// NOTE: We will release mountsLockfile when this function returns, so unlike most of the layer data, the
|
||
// r.layers[].MountPoint, r.layers[].MountCount, and r.bymount values might not reflect
|
||
// true on-filesystem state already by the time this function returns.
|
||
// Code that needs the state to be accurate must lock r.mountsLockfile again,
|
||
// and possibly loadMounts() again.
|
||
}
|
||
|
||
if errorToResolveBySaving != nil {
|
||
if !r.lockfile.IsReadWrite() {
|
||
return false, fmt.Errorf("internal error: layerStore.load has shouldSave but !r.lockfile.IsReadWrite")
|
||
}
|
||
// Last step: try to remove anything that a previous
|
||
// user of this storage area marked for deletion but didn't manage to
|
||
// actually delete.
|
||
var incompleteDeletionErrors error // = nil
|
||
var layersToDelete []*Layer
|
||
for _, layer := range r.layers {
|
||
if layer.Flags == nil {
|
||
layer.Flags = make(map[string]any)
|
||
}
|
||
if layerHasIncompleteFlag(layer) {
|
||
// Important: Do not call r.deleteInternal() here. It modifies r.layers
|
||
// which causes unexpected side effects while iterating over r.layers here.
|
||
// The range loop has no idea that the underlying elements where shifted
|
||
// around.
|
||
layersToDelete = append(layersToDelete, layer)
|
||
}
|
||
}
|
||
// Now actually delete the layers
|
||
for _, layer := range layersToDelete {
|
||
logrus.Warnf("Found incomplete layer %q, deleting it", layer.ID)
|
||
err := r.deleteInternal(layer.ID)
|
||
if err != nil {
|
||
// Don't return the error immediately, because deleteInternal does not saveLayers();
|
||
// Even if deleting one incomplete layer fails, call saveLayers() so that other possible successfully
|
||
// deleted incomplete layers have their metadata correctly removed.
|
||
incompleteDeletionErrors = errors.Join(incompleteDeletionErrors,
|
||
fmt.Errorf("deleting layer %#v: %w", layer.ID, err))
|
||
}
|
||
modifiedLocations |= layer.location
|
||
}
|
||
if err := r.saveLayers(modifiedLocations); err != nil {
|
||
return false, err
|
||
}
|
||
if incompleteDeletionErrors != nil {
|
||
return false, incompleteDeletionErrors
|
||
}
|
||
}
|
||
return false, nil
|
||
}
|
||
|
||
// The caller must hold r.mountsLockfile for reading or writing.
|
||
// The caller must hold r.inProcessLock for WRITING.
|
||
func (r *layerStore) loadMounts() error {
|
||
mounts := make(map[string]*Layer)
|
||
mpath := r.mountspath()
|
||
data, err := os.ReadFile(mpath)
|
||
if err != nil && !os.IsNotExist(err) {
|
||
return err
|
||
}
|
||
layerMounts := []layerMountPoint{}
|
||
if len(data) != 0 {
|
||
if err := json.Unmarshal(data, &layerMounts); err != nil {
|
||
return err
|
||
}
|
||
}
|
||
// Clear all of our mount information. If another process
|
||
// unmounted something, it (along with its zero count) won't
|
||
// have been encoded into the version of mountpoints.json that
|
||
// we're loading, so our count could fall out of sync with it
|
||
// if we don't, and if we subsequently change something else,
|
||
// we'd pass that error along to other process that reloaded
|
||
// the data after we saved it.
|
||
for _, layer := range r.layers {
|
||
layer.MountPoint = ""
|
||
layer.MountCount = 0
|
||
}
|
||
// All of the non-zero count values will have been encoded, so
|
||
// we reset the still-mounted ones based on the contents.
|
||
for _, mount := range layerMounts {
|
||
if mount.MountPoint != "" {
|
||
if layer, ok := r.lookup(mount.ID); ok {
|
||
mounts[mount.MountPoint] = layer
|
||
layer.MountPoint = mount.MountPoint
|
||
layer.MountCount = mount.MountCount
|
||
}
|
||
}
|
||
}
|
||
r.bymount = mounts
|
||
return nil
|
||
}
|
||
|
||
// save saves the contents of the store to disk.
|
||
// The caller must hold r.lockfile locked for writing.
|
||
// The caller must hold r.inProcessLock for WRITING.
|
||
func (r *layerStore) save(saveLocations layerLocations) error {
|
||
r.mountsLockfile.Lock()
|
||
defer r.mountsLockfile.Unlock()
|
||
if err := r.saveLayers(saveLocations); err != nil {
|
||
return err
|
||
}
|
||
return r.saveMounts()
|
||
}
|
||
|
||
// saveFor saves the contents of the store relevant for modifiedLayer to disk.
|
||
// The caller must hold r.lockfile locked for writing.
|
||
// The caller must hold r.inProcessLock for WRITING.
|
||
func (r *layerStore) saveFor(modifiedLayer *Layer) error {
|
||
return r.save(modifiedLayer.location)
|
||
}
|
||
|
||
// The caller must hold r.lockfile locked for writing.
|
||
// The caller must hold r.inProcessLock for WRITING.
|
||
func (r *layerStore) saveLayers(saveLocations layerLocations) error {
|
||
if !r.lockfile.IsReadWrite() {
|
||
return fmt.Errorf("not allowed to modify the layer store at %q: %w", r.layerdir, ErrStoreIsReadOnly)
|
||
}
|
||
r.lockfile.AssertLockedForWriting()
|
||
|
||
// This must be done before we write the file, because the process could be terminated
|
||
// after the file is written but before the lock file is updated.
|
||
lw, err := r.lockfile.RecordWrite()
|
||
if err != nil {
|
||
return err
|
||
}
|
||
r.lastWrite = lw
|
||
|
||
for locationIndex := range numLayerLocationIndex {
|
||
location := layerLocationFromIndex(locationIndex)
|
||
if location&saveLocations == 0 {
|
||
continue
|
||
}
|
||
rpath := r.jsonPath[locationIndex]
|
||
if rpath == "" {
|
||
return fmt.Errorf("internal error: no path for location %v", location)
|
||
}
|
||
if err := os.MkdirAll(filepath.Dir(rpath), 0o700); err != nil {
|
||
return err
|
||
}
|
||
subsetLayers := make([]*Layer, 0, len(r.layers))
|
||
for _, layer := range r.layers {
|
||
if layer.location == location {
|
||
subsetLayers = append(subsetLayers, layer)
|
||
}
|
||
}
|
||
|
||
jldata, err := json.Marshal(&subsetLayers)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
opts := ioutils.AtomicFileWriterOptions{}
|
||
if location == volatileLayerLocation {
|
||
opts.NoSync = true
|
||
}
|
||
if err := ioutils.AtomicWriteFileWithOpts(rpath, jldata, 0o600, &opts); err != nil {
|
||
return err
|
||
}
|
||
r.layerspathsModified[locationIndex] = opts.ModTime
|
||
}
|
||
return nil
|
||
}
|
||
|
||
// The caller must hold r.mountsLockfile for writing.
|
||
// The caller must hold r.inProcessLock for WRITING.
|
||
func (r *layerStore) saveMounts() error {
|
||
if !r.lockfile.IsReadWrite() {
|
||
return fmt.Errorf("not allowed to modify the layer store at %q: %w", r.layerdir, ErrStoreIsReadOnly)
|
||
}
|
||
r.mountsLockfile.AssertLockedForWriting()
|
||
mpath := r.mountspath()
|
||
if err := os.MkdirAll(filepath.Dir(mpath), 0o700); err != nil {
|
||
return err
|
||
}
|
||
mounts := make([]layerMountPoint, 0, len(r.layers))
|
||
for _, layer := range r.layers {
|
||
if layer.MountPoint != "" && layer.MountCount > 0 {
|
||
mounts = append(mounts, layerMountPoint{
|
||
ID: layer.ID,
|
||
MountPoint: layer.MountPoint,
|
||
MountCount: layer.MountCount,
|
||
})
|
||
}
|
||
}
|
||
jmdata, err := json.Marshal(&mounts)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
|
||
// This must be done before we write the file, because the process could be terminated
|
||
// after the file is written but before the lock file is updated.
|
||
lw, err := r.mountsLockfile.RecordWrite()
|
||
if err != nil {
|
||
return err
|
||
}
|
||
r.mountsLastWrite = lw
|
||
|
||
if err = ioutils.AtomicWriteFile(mpath, jmdata, 0o600); err != nil {
|
||
return err
|
||
}
|
||
return r.loadMounts()
|
||
}
|
||
|
||
func (s *store) newLayerStore(rundir, layerdir, imagedir string, driver drivers.Driver, transient bool) (rwLayerStore, error) {
|
||
if err := os.MkdirAll(rundir, 0o700); err != nil {
|
||
return nil, err
|
||
}
|
||
if err := os.MkdirAll(layerdir, 0o700); err != nil {
|
||
return nil, err
|
||
}
|
||
if imagedir != "" {
|
||
if err := os.MkdirAll(imagedir, 0o700); err != nil {
|
||
return nil, err
|
||
}
|
||
}
|
||
// Note: While the containers.lock file is in rundir for transient stores
|
||
// we don't want to do this here, because the non-transient layers in
|
||
// layers.json might be used externally as a read-only layer (using e.g.
|
||
// additionalimagestores), and that would look for the lockfile in the
|
||
// same directory
|
||
var lockFiles []*lockfile.LockFile
|
||
lockFile, err := lockfile.GetLockFile(filepath.Join(layerdir, "layers.lock"))
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
lockFiles = append(lockFiles, lockFile)
|
||
if imagedir != "" {
|
||
lockFile, err := lockfile.GetLockFile(filepath.Join(imagedir, "layers.lock"))
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
lockFiles = append(lockFiles, lockFile)
|
||
}
|
||
|
||
mountsLockfile, err := lockfile.GetLockFile(filepath.Join(rundir, "mountpoints.lock"))
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
volatileDir := layerdir
|
||
if transient {
|
||
volatileDir = rundir
|
||
}
|
||
layersImageDir := ""
|
||
if imagedir != "" {
|
||
layersImageDir = filepath.Join(imagedir, "layers.json")
|
||
}
|
||
rlstore := layerStore{
|
||
lockfile: newMultipleLockFile(lockFiles...),
|
||
mountsLockfile: mountsLockfile,
|
||
rundir: rundir,
|
||
jsonPath: [numLayerLocationIndex]string{
|
||
filepath.Join(layerdir, "layers.json"),
|
||
layersImageDir,
|
||
filepath.Join(volatileDir, "volatile-layers.json"),
|
||
},
|
||
layerdir: layerdir,
|
||
|
||
byid: make(map[string]*Layer),
|
||
byname: make(map[string]*Layer),
|
||
bymount: make(map[string]*Layer),
|
||
|
||
driver: driver,
|
||
}
|
||
if err := rlstore.startWritingWithReload(false); err != nil {
|
||
return nil, err
|
||
}
|
||
defer rlstore.stopWriting()
|
||
lw, err := rlstore.lockfile.GetLastWrite()
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
rlstore.lastWrite = lw
|
||
// rlstore.mountsLastWrite is initialized inside rlstore.load().
|
||
if _, err := rlstore.load(true); err != nil {
|
||
return nil, err
|
||
}
|
||
return &rlstore, nil
|
||
}
|
||
|
||
func newROLayerStore(rundir string, layerdir string, driver drivers.Driver) (roLayerStore, error) {
|
||
lockfile, err := lockfile.GetROLockFile(filepath.Join(layerdir, "layers.lock"))
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
rlstore := layerStore{
|
||
lockfile: newMultipleLockFile(lockfile),
|
||
mountsLockfile: nil,
|
||
rundir: rundir,
|
||
jsonPath: [numLayerLocationIndex]string{
|
||
filepath.Join(layerdir, "layers.json"),
|
||
"",
|
||
filepath.Join(layerdir, "volatile-layers.json"),
|
||
},
|
||
layerdir: layerdir,
|
||
|
||
byid: make(map[string]*Layer),
|
||
byname: make(map[string]*Layer),
|
||
bymount: make(map[string]*Layer),
|
||
|
||
driver: driver,
|
||
}
|
||
if err := rlstore.startReadingWithReload(false); err != nil {
|
||
return nil, err
|
||
}
|
||
defer rlstore.stopReading()
|
||
lw, err := rlstore.lockfile.GetLastWrite()
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
rlstore.lastWrite = lw
|
||
if _, err := rlstore.load(false); err != nil {
|
||
return nil, err
|
||
}
|
||
return &rlstore, nil
|
||
}
|
||
|
||
// Requires startReading or startWriting.
|
||
func (r *layerStore) lookup(id string) (*Layer, bool) {
|
||
if layer, ok := r.byid[id]; ok {
|
||
return layer, ok
|
||
} else if layer, ok := r.byname[id]; ok {
|
||
return layer, ok
|
||
} else if longid, err := r.idindex.Get(id); err == nil {
|
||
layer, ok := r.byid[longid]
|
||
return layer, ok
|
||
}
|
||
return nil, false
|
||
}
|
||
|
||
// Requires startReading or startWriting.
|
||
func (r *layerStore) Size(name string) (int64, error) {
|
||
layer, ok := r.lookup(name)
|
||
if !ok {
|
||
return -1, ErrLayerUnknown
|
||
}
|
||
// We use the presence of a non-empty digest as an indicator that the size value was intentionally set, and that
|
||
// a zero value is not just present because it was never set to anything else (which can happen if the layer was
|
||
// created by a version of this library that didn't keep track of digest and size information).
|
||
if layer.UncompressedDigest != "" || layer.TOCDigest != "" {
|
||
return layer.UncompressedSize, nil // This may return -1 if only TOCDigest is set
|
||
}
|
||
return -1, nil
|
||
}
|
||
|
||
// Requires startWriting.
|
||
func (r *layerStore) ClearFlag(id string, flag string) error {
|
||
if !r.lockfile.IsReadWrite() {
|
||
return fmt.Errorf("not allowed to clear flags on layers at %q: %w", r.layerdir, ErrStoreIsReadOnly)
|
||
}
|
||
layer, ok := r.lookup(id)
|
||
if !ok {
|
||
return ErrLayerUnknown
|
||
}
|
||
delete(layer.Flags, flag)
|
||
return r.saveFor(layer)
|
||
}
|
||
|
||
// Requires startWriting.
|
||
func (r *layerStore) SetFlag(id string, flag string, value any) error {
|
||
if !r.lockfile.IsReadWrite() {
|
||
return fmt.Errorf("not allowed to set flags on layers at %q: %w", r.layerdir, ErrStoreIsReadOnly)
|
||
}
|
||
layer, ok := r.lookup(id)
|
||
if !ok {
|
||
return ErrLayerUnknown
|
||
}
|
||
if layer.Flags == nil {
|
||
layer.Flags = make(map[string]any)
|
||
}
|
||
layer.Flags[flag] = value
|
||
return r.saveFor(layer)
|
||
}
|
||
|
||
func (r *layerStore) Status() ([][2]string, error) {
|
||
return r.driver.Status(), nil
|
||
}
|
||
|
||
// Requires startWriting.
|
||
func (r *layerStore) PutAdditionalLayer(id string, parentLayer *Layer, names []string, aLayer drivers.AdditionalLayer) (layer *Layer, err error) {
|
||
if duplicateLayer, idInUse := r.byid[id]; idInUse {
|
||
return duplicateLayer, ErrDuplicateID
|
||
}
|
||
for _, name := range names {
|
||
if _, nameInUse := r.byname[name]; nameInUse {
|
||
return nil, ErrDuplicateName
|
||
}
|
||
}
|
||
|
||
parent := ""
|
||
if parentLayer != nil {
|
||
parent = parentLayer.ID
|
||
}
|
||
|
||
info, err := aLayer.Info()
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
defer info.Close()
|
||
layer = &Layer{}
|
||
if err := json.NewDecoder(info).Decode(layer); err != nil {
|
||
return nil, err
|
||
}
|
||
layer.ID = id
|
||
layer.Parent = parent
|
||
layer.Created = time.Now().UTC()
|
||
|
||
if err := aLayer.CreateAs(id, parent); err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
// TODO: check if necessary fields are filled
|
||
r.layers = append(r.layers, layer)
|
||
// This can only fail on duplicate IDs, which shouldn’t happen — and in
|
||
// that case the index is already in the desired state anyway.
|
||
// Implementing recovery from an unlikely and unimportant failure here
|
||
// would be too risky.
|
||
_ = r.idindex.Add(id)
|
||
r.byid[id] = layer
|
||
for _, name := range names { // names got from the additional layer store won't be used
|
||
r.byname[name] = layer
|
||
}
|
||
if layer.CompressedDigest != "" {
|
||
r.bycompressedsum[layer.CompressedDigest] = append(r.bycompressedsum[layer.CompressedDigest], layer.ID)
|
||
}
|
||
if layer.UncompressedDigest != "" {
|
||
r.byuncompressedsum[layer.UncompressedDigest] = append(r.byuncompressedsum[layer.UncompressedDigest], layer.ID)
|
||
}
|
||
if layer.TOCDigest != "" {
|
||
r.bytocsum[layer.TOCDigest] = append(r.bytocsum[layer.TOCDigest], layer.ID)
|
||
}
|
||
if err := r.saveFor(layer); err != nil {
|
||
if e := r.Delete(layer.ID); e != nil {
|
||
logrus.Errorf("While recovering from a failure to save layers, error deleting layer %#v: %v", id, e)
|
||
}
|
||
return nil, err
|
||
}
|
||
return copyLayer(layer), nil
|
||
}
|
||
|
||
func (r *layerStore) pickStoreLocation(volatile, writeable bool) layerLocations {
|
||
switch {
|
||
case volatile:
|
||
return volatileLayerLocation
|
||
case !writeable && r.jsonPath[indexFromLayerLocation(imageStoreLayerLocation)] != "":
|
||
return imageStoreLayerLocation
|
||
default:
|
||
return stableLayerLocation
|
||
}
|
||
}
|
||
|
||
// Requires startWriting.
|
||
func (r *layerStore) create(id string, parentLayer *Layer, names []string, mountLabel string, options map[string]string, moreOptions *LayerOptions, writeable bool, diff io.Reader, slo *stagedLayerOptions) (layer *Layer, size int64, err error) {
|
||
if moreOptions == nil {
|
||
moreOptions = &LayerOptions{}
|
||
}
|
||
if !r.lockfile.IsReadWrite() {
|
||
return nil, -1, fmt.Errorf("not allowed to create new layers at %q: %w", r.layerdir, ErrStoreIsReadOnly)
|
||
}
|
||
if err := os.MkdirAll(r.rundir, 0o700); err != nil {
|
||
return nil, -1, err
|
||
}
|
||
if err := os.MkdirAll(r.layerdir, 0o700); err != nil {
|
||
return nil, -1, err
|
||
}
|
||
if id == "" {
|
||
id = stringid.GenerateRandomID()
|
||
_, idInUse := r.byid[id]
|
||
for idInUse {
|
||
id = stringid.GenerateRandomID()
|
||
_, idInUse = r.byid[id]
|
||
}
|
||
}
|
||
if duplicateLayer, idInUse := r.byid[id]; idInUse {
|
||
return duplicateLayer, -1, ErrDuplicateID
|
||
}
|
||
names = dedupeStrings(names)
|
||
for _, name := range names {
|
||
if _, nameInUse := r.byname[name]; nameInUse {
|
||
return nil, -1, ErrDuplicateName
|
||
}
|
||
}
|
||
parent := ""
|
||
if parentLayer != nil {
|
||
parent = parentLayer.ID
|
||
}
|
||
var (
|
||
templateIDMappings *idtools.IDMappings
|
||
templateMetadata string
|
||
templateCompressedDigest digest.Digest
|
||
templateCompressedSize int64
|
||
templateUncompressedDigest digest.Digest
|
||
templateTOCDigest digest.Digest
|
||
templateUncompressedSize int64
|
||
templateCompressionType archive.Compression
|
||
templateUIDs, templateGIDs []uint32
|
||
templateTSdata []byte
|
||
)
|
||
if moreOptions.TemplateLayer != "" {
|
||
templateLayer, ok := r.lookup(moreOptions.TemplateLayer)
|
||
if !ok {
|
||
return nil, -1, ErrLayerUnknown
|
||
}
|
||
templateMetadata = templateLayer.Metadata
|
||
templateIDMappings = idtools.NewIDMappingsFromMaps(templateLayer.UIDMap, templateLayer.GIDMap)
|
||
templateTOCDigest = templateLayer.TOCDigest
|
||
templateCompressedDigest, templateCompressedSize = templateLayer.CompressedDigest, templateLayer.CompressedSize
|
||
templateUncompressedDigest, templateUncompressedSize = templateLayer.UncompressedDigest, templateLayer.UncompressedSize
|
||
templateCompressionType = templateLayer.CompressionType
|
||
templateUIDs, templateGIDs = slices.Clone(templateLayer.UIDs), slices.Clone(templateLayer.GIDs)
|
||
templateTSdata, err = os.ReadFile(r.tspath(templateLayer.ID))
|
||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||
return nil, -1, err
|
||
}
|
||
} else {
|
||
templateIDMappings = &idtools.IDMappings{}
|
||
}
|
||
if mountLabel != "" {
|
||
selinux.ReserveLabel(mountLabel)
|
||
}
|
||
|
||
// Before actually creating the layer, make a persistent record of it
|
||
// with the incomplete flag set, so that future processes have a chance
|
||
// to clean up after it.
|
||
layer = &Layer{
|
||
ID: id,
|
||
Parent: parent,
|
||
Names: names,
|
||
MountLabel: mountLabel,
|
||
Metadata: templateMetadata,
|
||
Created: time.Now().UTC(),
|
||
CompressedDigest: templateCompressedDigest,
|
||
CompressedSize: templateCompressedSize,
|
||
UncompressedDigest: templateUncompressedDigest,
|
||
TOCDigest: templateTOCDigest,
|
||
UncompressedSize: templateUncompressedSize,
|
||
CompressionType: templateCompressionType,
|
||
UIDs: templateUIDs,
|
||
GIDs: templateGIDs,
|
||
Flags: newMapFrom(moreOptions.Flags),
|
||
UIDMap: copySlicePreferringNil(moreOptions.UIDMap),
|
||
GIDMap: copySlicePreferringNil(moreOptions.GIDMap),
|
||
BigDataNames: []string{},
|
||
location: r.pickStoreLocation(moreOptions.Volatile, writeable),
|
||
}
|
||
layer.Flags[incompleteFlag] = true
|
||
|
||
r.layers = append(r.layers, layer)
|
||
// This can only fail if the ID is already missing, which shouldn’t
|
||
// happen — and in that case the index is already in the desired state
|
||
// anyway. This is on various paths to recover from failures, so this
|
||
// should be robust against partially missing data.
|
||
_ = r.idindex.Add(id)
|
||
r.byid[id] = layer
|
||
for _, name := range names {
|
||
r.byname[name] = layer
|
||
}
|
||
|
||
cleanupFailureContext := ""
|
||
defer func() {
|
||
if err != nil {
|
||
// now that the in-memory structures know about the new
|
||
// record, we can use regular Delete() to clean up if
|
||
// anything breaks from here on out
|
||
if cleanupFailureContext == "" {
|
||
cleanupFailureContext = "unknown: cleanupFailureContext not set at the failure site"
|
||
}
|
||
if e := r.Delete(id); e != nil {
|
||
logrus.Errorf("While recovering from a failure (%s), error deleting layer %#v: %v", cleanupFailureContext, id, e)
|
||
}
|
||
}
|
||
}()
|
||
|
||
if err = r.saveFor(layer); err != nil {
|
||
cleanupFailureContext = "saving incomplete layer metadata"
|
||
return nil, -1, err
|
||
}
|
||
|
||
for _, item := range moreOptions.BigData {
|
||
if err = r.setBigData(layer, item.Key, item.Data); err != nil {
|
||
cleanupFailureContext = fmt.Sprintf("saving big data item %q", item.Key)
|
||
return nil, -1, err
|
||
}
|
||
}
|
||
|
||
idMappings := idtools.NewIDMappingsFromMaps(moreOptions.UIDMap, moreOptions.GIDMap)
|
||
opts := drivers.CreateOpts{
|
||
MountLabel: mountLabel,
|
||
StorageOpt: options,
|
||
IDMappings: idMappings,
|
||
}
|
||
|
||
var parentMappings, oldMappings *idtools.IDMappings
|
||
if parentLayer != nil {
|
||
parentMappings = idtools.NewIDMappingsFromMaps(parentLayer.UIDMap, parentLayer.GIDMap)
|
||
} else {
|
||
parentMappings = &idtools.IDMappings{}
|
||
}
|
||
if moreOptions.TemplateLayer != "" {
|
||
if err = r.driver.CreateFromTemplate(id, moreOptions.TemplateLayer, templateIDMappings, parent, parentMappings, &opts, writeable); err != nil {
|
||
cleanupFailureContext = fmt.Sprintf("creating a layer from template layer %q", moreOptions.TemplateLayer)
|
||
return nil, -1, fmt.Errorf("creating copy of template layer %q with ID %q: %w", moreOptions.TemplateLayer, id, err)
|
||
}
|
||
oldMappings = templateIDMappings
|
||
} else {
|
||
if writeable {
|
||
if err = r.driver.CreateReadWrite(id, parent, &opts); err != nil {
|
||
cleanupFailureContext = "creating a read-write layer"
|
||
return nil, -1, fmt.Errorf("creating read-write layer with ID %q: %w", id, err)
|
||
}
|
||
} else {
|
||
if err = r.driver.Create(id, parent, &opts); err != nil {
|
||
cleanupFailureContext = "creating a read-only layer"
|
||
return nil, -1, fmt.Errorf("creating read-only layer with ID %q: %w", id, err)
|
||
}
|
||
}
|
||
if parentLayer != nil {
|
||
oldMappings = parentMappings
|
||
}
|
||
}
|
||
|
||
if oldMappings != nil &&
|
||
(!reflect.DeepEqual(oldMappings.UIDs(), idMappings.UIDs()) || !reflect.DeepEqual(oldMappings.GIDs(), idMappings.GIDs())) {
|
||
if err = r.driver.UpdateLayerIDMap(id, oldMappings, idMappings, mountLabel); err != nil {
|
||
cleanupFailureContext = "in UpdateLayerIDMap"
|
||
return nil, -1, err
|
||
}
|
||
}
|
||
|
||
if len(templateTSdata) > 0 {
|
||
if err = os.MkdirAll(filepath.Dir(r.tspath(id)), 0o700); err != nil {
|
||
cleanupFailureContext = "creating tar-split parent directory for a copy from template"
|
||
return nil, -1, err
|
||
}
|
||
if err = ioutils.AtomicWriteFile(r.tspath(id), templateTSdata, 0o600); err != nil {
|
||
cleanupFailureContext = "creating a tar-split copy from template"
|
||
return nil, -1, err
|
||
}
|
||
}
|
||
|
||
size = -1
|
||
if diff != nil {
|
||
if size, err = r.applyDiffWithOptions(layer.ID, moreOptions, diff); err != nil {
|
||
cleanupFailureContext = "applying layer diff"
|
||
return nil, -1, err
|
||
}
|
||
} else if slo != nil {
|
||
if err := r.applyDiffFromStagingDirectory(layer.ID, slo.DiffOutput, slo.DiffOptions); err != nil {
|
||
cleanupFailureContext = "applying staged directory diff"
|
||
return nil, -1, err
|
||
}
|
||
} else {
|
||
// applyDiffWithOptions() would have updated r.bycompressedsum
|
||
// and r.byuncompressedsum for us, but if we used a template
|
||
// layer, we didn't call it, so add the new layer as candidates
|
||
// for searches for layers by checksum
|
||
if layer.CompressedDigest != "" {
|
||
r.bycompressedsum[layer.CompressedDigest] = append(r.bycompressedsum[layer.CompressedDigest], layer.ID)
|
||
}
|
||
if layer.UncompressedDigest != "" {
|
||
r.byuncompressedsum[layer.UncompressedDigest] = append(r.byuncompressedsum[layer.UncompressedDigest], layer.ID)
|
||
}
|
||
if layer.TOCDigest != "" {
|
||
r.bytocsum[layer.TOCDigest] = append(r.bytocsum[layer.TOCDigest], layer.ID)
|
||
}
|
||
}
|
||
|
||
delete(layer.Flags, incompleteFlag)
|
||
if err = r.saveFor(layer); err != nil {
|
||
cleanupFailureContext = "saving finished layer metadata"
|
||
return nil, -1, err
|
||
}
|
||
|
||
layer = copyLayer(layer)
|
||
return layer, size, err
|
||
}
|
||
|
||
// Requires startReading or startWriting.
|
||
func (r *layerStore) Mounted(id string) (int, error) {
|
||
if !r.lockfile.IsReadWrite() {
|
||
return 0, fmt.Errorf("no mount information for layers at %q: %w", r.mountspath(), ErrStoreIsReadOnly)
|
||
}
|
||
layer, ok := r.lookup(id)
|
||
if !ok {
|
||
return 0, ErrLayerUnknown
|
||
}
|
||
// NOTE: The caller of this function is not holding (currently cannot hold) r.mountsLockfile,
|
||
// so the data is necessarily obsolete by the time this function returns. So, we don’t even
|
||
// try to reload it in this function, we just rely on r.load() that happened during
|
||
// r.startReading() or r.startWriting().
|
||
return layer.MountCount, nil
|
||
}
|
||
|
||
// Requires startWriting.
|
||
func (r *layerStore) Mount(id string, options drivers.MountOpts) (string, error) {
|
||
// LOCKING BUG: This is reachable via store.Diff → layerStore.Diff → layerStore.newFileGetter
|
||
// (with btrfs and zfs graph drivers) holding layerStore only locked for reading, while it modifies
|
||
// - r.layers[].MountCount (directly and via loadMounts / saveMounts)
|
||
// - r.layers[].MountPoint (directly and via loadMounts / saveMounts)
|
||
// - r.bymount (via loadMounts / saveMounts)
|
||
|
||
// You are not allowed to mount layers from readonly stores if they
|
||
// are not mounted read/only.
|
||
if !r.lockfile.IsReadWrite() && !slices.Contains(options.Options, "ro") {
|
||
return "", fmt.Errorf("not allowed to update mount locations for layers at %q: %w", r.mountspath(), ErrStoreIsReadOnly)
|
||
}
|
||
r.mountsLockfile.Lock()
|
||
defer r.mountsLockfile.Unlock()
|
||
if err := r.reloadMountsIfChanged(); err != nil {
|
||
return "", err
|
||
}
|
||
layer, ok := r.lookup(id)
|
||
if !ok {
|
||
return "", ErrLayerUnknown
|
||
}
|
||
if layer.MountCount > 0 {
|
||
mounted, err := mount.Mounted(layer.MountPoint)
|
||
if err != nil {
|
||
return "", err
|
||
}
|
||
// If the container is not mounted then we have a condition
|
||
// where the kernel umounted the mount point. This means
|
||
// that the mount count never got decremented.
|
||
if mounted {
|
||
layer.MountCount++
|
||
return layer.MountPoint, r.saveMounts()
|
||
}
|
||
}
|
||
if options.MountLabel == "" {
|
||
options.MountLabel = layer.MountLabel
|
||
}
|
||
|
||
if (options.UidMaps != nil || options.GidMaps != nil) && !r.driver.SupportsShifting() {
|
||
if !reflect.DeepEqual(options.UidMaps, layer.UIDMap) || !reflect.DeepEqual(options.GidMaps, layer.GIDMap) {
|
||
return "", fmt.Errorf("cannot mount layer %v: shifting not enabled", layer.ID)
|
||
}
|
||
}
|
||
mountpoint, err := r.driver.Get(id, options)
|
||
if mountpoint != "" && err == nil {
|
||
if layer.MountPoint != "" {
|
||
delete(r.bymount, layer.MountPoint)
|
||
}
|
||
layer.MountPoint = filepath.Clean(mountpoint)
|
||
layer.MountCount++
|
||
r.bymount[layer.MountPoint] = layer
|
||
err = r.saveMounts()
|
||
}
|
||
return mountpoint, err
|
||
}
|
||
|
||
// Requires startWriting.
|
||
func (r *layerStore) unmount(id string, force bool, conditional bool) (bool, error) {
|
||
// LOCKING BUG: This is reachable via store.Diff → layerStore.Diff → layerStore.newFileGetter → simpleGetCloser.Close()
|
||
// (with btrfs and zfs graph drivers) holding layerStore only locked for reading, while it modifies
|
||
// - r.layers[].MountCount (directly and via loadMounts / saveMounts)
|
||
// - r.layers[].MountPoint (directly and via loadMounts / saveMounts)
|
||
// - r.bymount (via loadMounts / saveMounts)
|
||
|
||
if !r.lockfile.IsReadWrite() {
|
||
return false, fmt.Errorf("not allowed to update mount locations for layers at %q: %w", r.mountspath(), ErrStoreIsReadOnly)
|
||
}
|
||
r.mountsLockfile.Lock()
|
||
defer r.mountsLockfile.Unlock()
|
||
if err := r.reloadMountsIfChanged(); err != nil {
|
||
return false, err
|
||
}
|
||
layer, ok := r.lookup(id)
|
||
if !ok {
|
||
layerByMount, ok := r.bymount[filepath.Clean(id)]
|
||
if !ok {
|
||
return false, ErrLayerUnknown
|
||
}
|
||
layer = layerByMount
|
||
}
|
||
if conditional && layer.MountCount == 0 {
|
||
return false, ErrLayerNotMounted
|
||
}
|
||
if force {
|
||
layer.MountCount = 1
|
||
}
|
||
if layer.MountCount > 1 {
|
||
layer.MountCount--
|
||
return true, r.saveMounts()
|
||
}
|
||
err := r.driver.Put(id)
|
||
if err == nil || os.IsNotExist(err) {
|
||
if layer.MountPoint != "" {
|
||
delete(r.bymount, layer.MountPoint)
|
||
}
|
||
layer.MountCount--
|
||
layer.MountPoint = ""
|
||
return false, r.saveMounts()
|
||
}
|
||
return true, err
|
||
}
|
||
|
||
// Requires startReading or startWriting.
|
||
func (r *layerStore) ParentOwners(id string) (uids, gids []int, err error) {
|
||
if !r.lockfile.IsReadWrite() {
|
||
return nil, nil, fmt.Errorf("no mount information for layers at %q: %w", r.mountspath(), ErrStoreIsReadOnly)
|
||
}
|
||
r.mountsLockfile.RLock()
|
||
defer r.mountsLockfile.Unlock()
|
||
// We are not checking r.mountsLockfile.Modified() and calling r.loadMounts here because the store
|
||
// is only locked for reading = we are not allowed to modify layer data.
|
||
// Holding r.mountsLockfile protects us against concurrent mount/unmount operations.
|
||
layer, ok := r.lookup(id)
|
||
if !ok {
|
||
return nil, nil, ErrLayerUnknown
|
||
}
|
||
if len(layer.UIDMap) == 0 && len(layer.GIDMap) == 0 {
|
||
// We're not using any mappings, so there aren't any unmapped IDs on parent directories.
|
||
return nil, nil, nil
|
||
}
|
||
if layer.MountPoint == "" {
|
||
// We don't know which directories to examine.
|
||
return nil, nil, ErrLayerNotMounted
|
||
}
|
||
// Holding r.mountsLockfile protects us against concurrent mount/unmount operations, but we didn’t
|
||
// hold it continuously since the time we loaded the mount data; so it’s possible the layer
|
||
// was unmounted in the meantime, or mounted elsewhere. Treat that as if we were run after the unmount,
|
||
// = a missing mount, not a filesystem error.
|
||
if _, err := system.Lstat(layer.MountPoint); errors.Is(err, os.ErrNotExist) {
|
||
return nil, nil, ErrLayerNotMounted
|
||
}
|
||
rootuid, rootgid, err := idtools.GetRootUIDGID(layer.UIDMap, layer.GIDMap)
|
||
if err != nil {
|
||
return nil, nil, fmt.Errorf("reading root ID values for layer %q: %w", layer.ID, err)
|
||
}
|
||
m := idtools.NewIDMappingsFromMaps(layer.UIDMap, layer.GIDMap)
|
||
fsuids := make(map[int]struct{})
|
||
fsgids := make(map[int]struct{})
|
||
for dir := filepath.Dir(layer.MountPoint); dir != "" && dir != string(os.PathSeparator); dir = filepath.Dir(dir) {
|
||
st, err := system.Stat(dir)
|
||
if err != nil {
|
||
return nil, nil, fmt.Errorf("read directory ownership: %w", err)
|
||
}
|
||
lst, err := system.Lstat(dir)
|
||
if err != nil {
|
||
return nil, nil, err
|
||
}
|
||
fsuid := int(st.UID())
|
||
fsgid := int(st.GID())
|
||
if _, _, err := m.ToContainer(idtools.IDPair{UID: fsuid, GID: rootgid}); err != nil {
|
||
fsuids[fsuid] = struct{}{}
|
||
}
|
||
if _, _, err := m.ToContainer(idtools.IDPair{UID: rootuid, GID: fsgid}); err != nil {
|
||
fsgids[fsgid] = struct{}{}
|
||
}
|
||
fsuid = int(lst.UID())
|
||
fsgid = int(lst.GID())
|
||
if _, _, err := m.ToContainer(idtools.IDPair{UID: fsuid, GID: rootgid}); err != nil {
|
||
fsuids[fsuid] = struct{}{}
|
||
}
|
||
if _, _, err := m.ToContainer(idtools.IDPair{UID: rootuid, GID: fsgid}); err != nil {
|
||
fsgids[fsgid] = struct{}{}
|
||
}
|
||
}
|
||
for uid := range fsuids {
|
||
uids = append(uids, uid)
|
||
}
|
||
for gid := range fsgids {
|
||
gids = append(gids, gid)
|
||
}
|
||
if len(uids) > 1 {
|
||
sort.Ints(uids)
|
||
}
|
||
if len(gids) > 1 {
|
||
sort.Ints(gids)
|
||
}
|
||
return uids, gids, nil
|
||
}
|
||
|
||
// Requires startWriting.
|
||
func (r *layerStore) removeName(layer *Layer, name string) {
|
||
layer.Names = stringSliceWithoutValue(layer.Names, name)
|
||
}
|
||
|
||
// Requires startWriting.
|
||
func (r *layerStore) updateNames(id string, names []string, op updateNameOperation) error {
|
||
if !r.lockfile.IsReadWrite() {
|
||
return fmt.Errorf("not allowed to change layer name assignments at %q: %w", r.layerdir, ErrStoreIsReadOnly)
|
||
}
|
||
layer, ok := r.lookup(id)
|
||
if !ok {
|
||
return ErrLayerUnknown
|
||
}
|
||
oldNames := layer.Names
|
||
names, err := applyNameOperation(oldNames, names, op)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
for _, name := range oldNames {
|
||
delete(r.byname, name)
|
||
}
|
||
for _, name := range names {
|
||
if otherLayer, ok := r.byname[name]; ok {
|
||
r.removeName(otherLayer, name)
|
||
}
|
||
r.byname[name] = layer
|
||
}
|
||
layer.Names = names
|
||
return r.saveFor(layer)
|
||
}
|
||
|
||
func (r *layerStore) datadir(id string) string {
|
||
return filepath.Join(r.layerdir, id)
|
||
}
|
||
|
||
func (r *layerStore) datapath(id, key string) string {
|
||
return filepath.Join(r.datadir(id), makeBigDataBaseName(key))
|
||
}
|
||
|
||
// Requires startReading or startWriting.
|
||
func (r *layerStore) BigData(id, key string) (io.ReadCloser, error) {
|
||
if key == "" {
|
||
return nil, fmt.Errorf("can't retrieve layer big data value for empty name: %w", ErrInvalidBigDataName)
|
||
}
|
||
layer, ok := r.lookup(id)
|
||
if !ok {
|
||
return nil, fmt.Errorf("locating layer with ID %q: %w", id, ErrLayerUnknown)
|
||
}
|
||
return os.Open(r.datapath(layer.ID, key))
|
||
}
|
||
|
||
// Requires startWriting.
|
||
func (r *layerStore) SetBigData(id, key string, data io.Reader) error {
|
||
if !r.lockfile.IsReadWrite() {
|
||
return fmt.Errorf("not allowed to save data items associated with layers at %q: %w", r.layerdir, ErrStoreIsReadOnly)
|
||
}
|
||
layer, ok := r.lookup(id)
|
||
if !ok {
|
||
return fmt.Errorf("locating layer with ID %q to write bigdata: %w", id, ErrLayerUnknown)
|
||
}
|
||
return r.setBigData(layer, key, data)
|
||
}
|
||
|
||
func (r *layerStore) setBigData(layer *Layer, key string, data io.Reader) error {
|
||
if key == "" {
|
||
return fmt.Errorf("can't set empty name for layer big data item: %w", ErrInvalidBigDataName)
|
||
}
|
||
err := os.MkdirAll(r.datadir(layer.ID), 0o700)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
|
||
// NewAtomicFileWriter doesn't overwrite/truncate the existing inode.
|
||
// BigData() relies on this behaviour when opening the file for read
|
||
// so that it is either accessing the old data or the new one.
|
||
writer, err := ioutils.NewAtomicFileWriter(r.datapath(layer.ID, key), 0o600)
|
||
if err != nil {
|
||
return fmt.Errorf("opening bigdata file: %w", err)
|
||
}
|
||
|
||
if _, err := io.Copy(writer, data); err != nil {
|
||
writer.Close()
|
||
return fmt.Errorf("copying bigdata for the layer: %w", err)
|
||
|
||
}
|
||
if err := writer.Close(); err != nil {
|
||
return fmt.Errorf("closing bigdata file for the layer: %w", err)
|
||
}
|
||
|
||
if !slices.Contains(layer.BigDataNames, key) {
|
||
layer.BigDataNames = append(layer.BigDataNames, key)
|
||
return r.saveFor(layer)
|
||
}
|
||
return nil
|
||
}
|
||
|
||
// Requires startReading or startWriting.
|
||
func (r *layerStore) BigDataNames(id string) ([]string, error) {
|
||
layer, ok := r.lookup(id)
|
||
if !ok {
|
||
return nil, fmt.Errorf("locating layer with ID %q to retrieve bigdata names: %w", id, ErrImageUnknown)
|
||
}
|
||
return copySlicePreferringNil(layer.BigDataNames), nil
|
||
}
|
||
|
||
// Requires startReading or startWriting.
|
||
func (r *layerStore) Metadata(id string) (string, error) {
|
||
if layer, ok := r.lookup(id); ok {
|
||
return layer.Metadata, nil
|
||
}
|
||
return "", ErrLayerUnknown
|
||
}
|
||
|
||
// Requires startWriting.
|
||
func (r *layerStore) SetMetadata(id, metadata string) error {
|
||
if !r.lockfile.IsReadWrite() {
|
||
return fmt.Errorf("not allowed to modify layer metadata at %q: %w", r.layerdir, ErrStoreIsReadOnly)
|
||
}
|
||
if layer, ok := r.lookup(id); ok {
|
||
layer.Metadata = metadata
|
||
return r.saveFor(layer)
|
||
}
|
||
return ErrLayerUnknown
|
||
}
|
||
|
||
func (r *layerStore) tspath(id string) string {
|
||
return filepath.Join(r.layerdir, id+tarSplitSuffix)
|
||
}
|
||
|
||
// layerHasIncompleteFlag returns true if layer.Flags contains an incompleteFlag set to true
|
||
// The caller must hold r.inProcessLock for reading.
|
||
func layerHasIncompleteFlag(layer *Layer) bool {
|
||
if layer.Flags == nil {
|
||
return false
|
||
}
|
||
if flagValue, ok := layer.Flags[incompleteFlag]; ok {
|
||
if b, ok := flagValue.(bool); ok && b {
|
||
return true
|
||
}
|
||
}
|
||
return false
|
||
}
|
||
|
||
// Requires startWriting.
|
||
func (r *layerStore) deleteInternal(id string) error {
|
||
if !r.lockfile.IsReadWrite() {
|
||
return fmt.Errorf("not allowed to delete layers at %q: %w", r.layerdir, ErrStoreIsReadOnly)
|
||
}
|
||
layer, ok := r.lookup(id)
|
||
if !ok {
|
||
return ErrLayerUnknown
|
||
}
|
||
// Ensure that if we are interrupted, the layer will be cleaned up.
|
||
if !layerHasIncompleteFlag(layer) {
|
||
if layer.Flags == nil {
|
||
layer.Flags = make(map[string]any)
|
||
}
|
||
layer.Flags[incompleteFlag] = true
|
||
if err := r.saveFor(layer); err != nil {
|
||
return err
|
||
}
|
||
}
|
||
// We never unset incompleteFlag; below, we remove the entire object from r.layers.
|
||
id = layer.ID
|
||
if err := r.driver.Remove(id); err != nil && !errors.Is(err, os.ErrNotExist) {
|
||
return err
|
||
}
|
||
os.Remove(r.tspath(id))
|
||
os.RemoveAll(r.datadir(id))
|
||
delete(r.byid, id)
|
||
for _, name := range layer.Names {
|
||
delete(r.byname, name)
|
||
}
|
||
// This can only fail if the ID is already missing, which shouldn’t
|
||
// happen — and in that case the index is already in the desired state
|
||
// anyway. The store’s Delete method is used on various paths to
|
||
// recover from failures, so this should be robust against partially
|
||
// missing data.
|
||
_ = r.idindex.Delete(id)
|
||
mountLabel := layer.MountLabel
|
||
if layer.MountPoint != "" {
|
||
delete(r.bymount, layer.MountPoint)
|
||
}
|
||
r.deleteInDigestMap(id)
|
||
r.layers = slices.DeleteFunc(r.layers, func(candidate *Layer) bool {
|
||
return candidate.ID == id
|
||
})
|
||
if mountLabel != "" && !slices.ContainsFunc(r.layers, func(candidate *Layer) bool {
|
||
return candidate.MountLabel == mountLabel
|
||
}) {
|
||
selinux.ReleaseLabel(mountLabel)
|
||
}
|
||
return nil
|
||
}
|
||
|
||
// Requires startWriting.
|
||
func (r *layerStore) deleteInDigestMap(id string) {
|
||
for digest, layers := range r.bycompressedsum {
|
||
if i := slices.Index(layers, id); i != -1 {
|
||
layers = slices.Delete(layers, i, i+1)
|
||
r.bycompressedsum[digest] = layers
|
||
}
|
||
}
|
||
for digest, layers := range r.byuncompressedsum {
|
||
if i := slices.Index(layers, id); i != -1 {
|
||
layers = slices.Delete(layers, i, i+1)
|
||
r.byuncompressedsum[digest] = layers
|
||
}
|
||
}
|
||
}
|
||
|
||
// Requires startWriting.
|
||
func (r *layerStore) Delete(id string) error {
|
||
layer, ok := r.lookup(id)
|
||
if !ok {
|
||
return ErrLayerUnknown
|
||
}
|
||
id = layer.ID
|
||
// The layer may already have been explicitly unmounted, but if not, we
|
||
// should try to clean that up before we start deleting anything at the
|
||
// driver level.
|
||
for {
|
||
_, err := r.unmount(id, false, true)
|
||
if err == ErrLayerNotMounted {
|
||
break
|
||
}
|
||
if err != nil {
|
||
return err
|
||
}
|
||
}
|
||
if err := r.deleteInternal(id); err != nil {
|
||
return err
|
||
}
|
||
return r.saveFor(layer)
|
||
}
|
||
|
||
// Requires startReading or startWriting.
|
||
func (r *layerStore) Exists(id string) bool {
|
||
_, ok := r.lookup(id)
|
||
return ok
|
||
}
|
||
|
||
// Requires startReading or startWriting.
|
||
func (r *layerStore) Get(id string) (*Layer, error) {
|
||
if layer, ok := r.lookup(id); ok {
|
||
return copyLayer(layer), nil
|
||
}
|
||
return nil, ErrLayerUnknown
|
||
}
|
||
|
||
// Requires startWriting.
|
||
func (r *layerStore) Wipe() error {
|
||
if !r.lockfile.IsReadWrite() {
|
||
return fmt.Errorf("not allowed to delete layers at %q: %w", r.layerdir, ErrStoreIsReadOnly)
|
||
}
|
||
ids := make([]string, 0, len(r.byid))
|
||
for id := range r.byid {
|
||
ids = append(ids, id)
|
||
}
|
||
sort.Slice(ids, func(i, j int) bool {
|
||
return r.byid[ids[i]].Created.After(r.byid[ids[j]].Created)
|
||
})
|
||
for _, id := range ids {
|
||
if err := r.Delete(id); err != nil {
|
||
return err
|
||
}
|
||
}
|
||
ids, err := r.driver.ListLayers()
|
||
if err != nil {
|
||
if !errors.Is(err, drivers.ErrNotSupported) {
|
||
return err
|
||
}
|
||
ids = nil
|
||
}
|
||
for _, id := range ids {
|
||
if err := r.driver.Remove(id); err != nil {
|
||
return err
|
||
}
|
||
}
|
||
return nil
|
||
}
|
||
|
||
// Requires startReading or startWriting.
|
||
func (r *layerStore) findParentAndLayer(from, to string) (fromID string, toID string, fromLayer, toLayer *Layer, err error) {
|
||
var ok bool
|
||
toLayer, ok = r.lookup(to)
|
||
if !ok {
|
||
return "", "", nil, nil, ErrLayerUnknown
|
||
}
|
||
to = toLayer.ID
|
||
if from == "" {
|
||
from = toLayer.Parent
|
||
}
|
||
if from != "" {
|
||
fromLayer, ok = r.lookup(from)
|
||
if ok {
|
||
from = fromLayer.ID
|
||
} else {
|
||
fromLayer, ok = r.lookup(toLayer.Parent)
|
||
if ok {
|
||
from = fromLayer.ID
|
||
}
|
||
}
|
||
}
|
||
return from, to, fromLayer, toLayer, nil
|
||
}
|
||
|
||
// The caller must hold r.inProcessLock for reading.
|
||
func (r *layerStore) layerMappings(layer *Layer) *idtools.IDMappings {
|
||
if layer == nil {
|
||
return &idtools.IDMappings{}
|
||
}
|
||
return idtools.NewIDMappingsFromMaps(layer.UIDMap, layer.GIDMap)
|
||
}
|
||
|
||
// Requires startReading or startWriting.
|
||
//
|
||
// NOTE: Overlay’s implementation assumes use of an exclusive lock over the primary layer store,
|
||
// see drivers/overlay.Driver.getMergedDir.
|
||
func (r *layerStore) Changes(from, to string) ([]archive.Change, error) {
|
||
from, to, fromLayer, toLayer, err := r.findParentAndLayer(from, to)
|
||
if err != nil {
|
||
return nil, ErrLayerUnknown
|
||
}
|
||
return r.driver.Changes(to, r.layerMappings(toLayer), from, r.layerMappings(fromLayer), toLayer.MountLabel)
|
||
}
|
||
|
||
type simpleGetCloser struct {
|
||
r *layerStore
|
||
path string
|
||
id string
|
||
}
|
||
|
||
func (s *simpleGetCloser) Get(path string) (io.ReadCloser, error) {
|
||
return os.Open(filepath.Join(s.path, path))
|
||
}
|
||
|
||
// LOCKING BUG: See the comments in layerStore.Diff
|
||
func (s *simpleGetCloser) Close() error {
|
||
_, err := s.r.unmount(s.id, false, false)
|
||
return err
|
||
}
|
||
|
||
// LOCKING BUG: See the comments in layerStore.Diff
|
||
func (r *layerStore) newFileGetter(id string) (drivers.FileGetCloser, error) {
|
||
if getter, ok := r.driver.(drivers.DiffGetterDriver); ok {
|
||
fgc, err := getter.DiffGetter(id)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
if fgc != nil {
|
||
return fgc, nil
|
||
}
|
||
}
|
||
|
||
path, err := r.Mount(id, drivers.MountOpts{Options: []string{"ro"}})
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
return &simpleGetCloser{
|
||
r: r,
|
||
path: path,
|
||
id: id,
|
||
}, nil
|
||
}
|
||
|
||
// writeCompressedData copies data from source to compressor, which is on top of pwriter.
|
||
func writeCompressedData(compressor io.WriteCloser, source io.ReadCloser) error {
|
||
defer compressor.Close()
|
||
defer source.Close()
|
||
_, err := io.Copy(compressor, source)
|
||
return err
|
||
}
|
||
|
||
// writeCompressedDataGoroutine copies data from source to compressor, which is on top of pwriter.
|
||
// All error must be reported by updating pwriter.
|
||
func writeCompressedDataGoroutine(pwriter *io.PipeWriter, compressor io.WriteCloser, source io.ReadCloser) {
|
||
err := errors.New("internal error: unexpected panic in writeCompressedDataGoroutine")
|
||
defer func() { // Note that this is not the same as {defer dest.CloseWithError(err)}; we need err to be evaluated lazily.
|
||
_ = pwriter.CloseWithError(err) // CloseWithError(nil) is equivalent to Close(), always returns nil
|
||
}()
|
||
err = writeCompressedData(compressor, source)
|
||
}
|
||
|
||
// Requires startReading or startWriting.
|
||
//
|
||
// NOTE: Overlay’s implementation assumes use of an exclusive lock over the primary layer store,
|
||
// see drivers/overlay.Driver.getMergedDir.
|
||
func (r *layerStore) Diff(from, to string, options *DiffOptions) (io.ReadCloser, error) {
|
||
var metadata storage.Unpacker
|
||
|
||
from, to, fromLayer, toLayer, err := r.findParentAndLayer(from, to)
|
||
if err != nil {
|
||
return nil, ErrLayerUnknown
|
||
}
|
||
// Default to applying the type of compression that we noted was used
|
||
// for the layerdiff when it was applied.
|
||
compression := toLayer.CompressionType
|
||
// If a particular compression type (or no compression) was selected,
|
||
// use that instead.
|
||
if options != nil && options.Compression != nil {
|
||
compression = *options.Compression
|
||
}
|
||
maybeCompressReadCloser := func(rc io.ReadCloser) (io.ReadCloser, error) {
|
||
// Depending on whether or not compression is desired, return either the
|
||
// passed-in ReadCloser, or a new one that provides its readers with a
|
||
// compressed version of the data that the original would have provided
|
||
// to its readers.
|
||
if compression == archive.Uncompressed {
|
||
return rc, nil
|
||
}
|
||
preader, pwriter := io.Pipe()
|
||
compressor, err := archive.CompressStream(pwriter, compression)
|
||
if err != nil {
|
||
rc.Close()
|
||
pwriter.Close()
|
||
preader.Close()
|
||
return nil, err
|
||
}
|
||
go writeCompressedDataGoroutine(pwriter, compressor, rc)
|
||
return preader, nil
|
||
}
|
||
|
||
if from != toLayer.Parent {
|
||
diff, err := r.driver.Diff(to, r.layerMappings(toLayer), from, r.layerMappings(fromLayer), toLayer.MountLabel)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
return maybeCompressReadCloser(diff)
|
||
}
|
||
|
||
if ad, ok := r.driver.(drivers.AdditionalLayerStoreDriver); ok {
|
||
if aLayer, err := ad.LookupAdditionalLayerByID(to); err == nil {
|
||
// This is an additional layer. We leverage blob API for acquiring the reproduced raw blob.
|
||
info, err := aLayer.Info()
|
||
if err != nil {
|
||
aLayer.Release()
|
||
return nil, err
|
||
}
|
||
defer info.Close()
|
||
layer := &Layer{}
|
||
if err := json.NewDecoder(info).Decode(layer); err != nil {
|
||
aLayer.Release()
|
||
return nil, err
|
||
}
|
||
blob, err := aLayer.Blob()
|
||
if err != nil {
|
||
aLayer.Release()
|
||
return nil, err
|
||
}
|
||
// If layer compression type is different from the expected one, decompress and convert it.
|
||
if compression != layer.CompressionType {
|
||
diff, err := archive.DecompressStream(blob)
|
||
if err != nil {
|
||
if err2 := blob.Close(); err2 != nil {
|
||
err = fmt.Errorf("failed to close blob file: %v: %w", err2, err)
|
||
}
|
||
aLayer.Release()
|
||
return nil, err
|
||
}
|
||
rc, err := maybeCompressReadCloser(diff)
|
||
if err != nil {
|
||
if err2 := closeAll(blob.Close, diff.Close); err2 != nil {
|
||
err = fmt.Errorf("failed to cleanup: %v: %w", err2, err)
|
||
}
|
||
aLayer.Release()
|
||
return nil, err
|
||
}
|
||
return ioutils.NewReadCloserWrapper(rc, func() error {
|
||
defer aLayer.Release()
|
||
return closeAll(blob.Close, rc.Close)
|
||
}), nil
|
||
}
|
||
return ioutils.NewReadCloserWrapper(blob, func() error { defer aLayer.Release(); return blob.Close() }), nil
|
||
}
|
||
}
|
||
|
||
tsfile, err := os.Open(r.tspath(to))
|
||
if err != nil {
|
||
if !os.IsNotExist(err) {
|
||
return nil, err
|
||
}
|
||
diff, err := r.driver.Diff(to, r.layerMappings(toLayer), from, r.layerMappings(fromLayer), toLayer.MountLabel)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
return maybeCompressReadCloser(diff)
|
||
}
|
||
|
||
decompressor, err := pgzip.NewReader(tsfile)
|
||
if err != nil {
|
||
if e := tsfile.Close(); e != nil {
|
||
logrus.Debug(e)
|
||
}
|
||
return nil, err
|
||
}
|
||
|
||
metadata = storage.NewJSONUnpacker(decompressor)
|
||
|
||
// LOCKING BUG: With btrfs and zfs graph drivers), this uses r.Mount() and r.unmount() holding layerStore only locked for reading
|
||
// but they modify in-memory state.
|
||
fgetter, err := r.newFileGetter(to)
|
||
if err != nil {
|
||
errs := fmt.Errorf("creating file-getter: %w", err)
|
||
if err := decompressor.Close(); err != nil {
|
||
errs = errors.Join(errs, fmt.Errorf("closing decompressor: %w", err))
|
||
}
|
||
if err := tsfile.Close(); err != nil {
|
||
errs = errors.Join(errs, fmt.Errorf("closing tarstream headers: %w", err))
|
||
}
|
||
return nil, errs
|
||
}
|
||
|
||
tarstream := asm.NewOutputTarStream(fgetter, metadata)
|
||
rc := ioutils.NewReadCloserWrapper(tarstream, func() error {
|
||
var errs error
|
||
if err := decompressor.Close(); err != nil {
|
||
errs = errors.Join(errs, fmt.Errorf("closing decompressor: %w", err))
|
||
}
|
||
if err := tsfile.Close(); err != nil {
|
||
errs = errors.Join(errs, fmt.Errorf("closing tarstream headers: %w", err))
|
||
}
|
||
if err := tarstream.Close(); err != nil {
|
||
errs = errors.Join(errs, fmt.Errorf("closing reconstructed tarstream: %w", err))
|
||
}
|
||
if err := fgetter.Close(); err != nil {
|
||
errs = errors.Join(errs, fmt.Errorf("closing file-getter: %w", err))
|
||
}
|
||
if errs != nil {
|
||
return errs
|
||
}
|
||
return nil
|
||
})
|
||
return maybeCompressReadCloser(rc)
|
||
}
|
||
|
||
// Requires startReading or startWriting.
|
||
func (r *layerStore) DiffSize(from, to string) (size int64, err error) {
|
||
var fromLayer, toLayer *Layer
|
||
from, to, fromLayer, toLayer, err = r.findParentAndLayer(from, to)
|
||
if err != nil {
|
||
return -1, ErrLayerUnknown
|
||
}
|
||
return r.driver.DiffSize(to, r.layerMappings(toLayer), from, r.layerMappings(fromLayer), toLayer.MountLabel)
|
||
}
|
||
|
||
func updateDigestMap(m *map[digest.Digest][]string, oldvalue, newvalue digest.Digest, id string) {
|
||
var newList []string
|
||
if oldvalue != "" {
|
||
for _, value := range (*m)[oldvalue] {
|
||
if value != id {
|
||
newList = append(newList, value)
|
||
}
|
||
}
|
||
if len(newList) > 0 {
|
||
(*m)[oldvalue] = newList
|
||
} else {
|
||
delete(*m, oldvalue)
|
||
}
|
||
}
|
||
if newvalue != "" {
|
||
(*m)[newvalue] = append((*m)[newvalue], id)
|
||
}
|
||
}
|
||
|
||
// Requires startWriting.
|
||
func (r *layerStore) ApplyDiff(to string, diff io.Reader) (size int64, err error) {
|
||
return r.applyDiffWithOptions(to, nil, diff)
|
||
}
|
||
|
||
// Requires startWriting.
|
||
func (r *layerStore) applyDiffWithOptions(to string, layerOptions *LayerOptions, diff io.Reader) (size int64, err error) {
|
||
if !r.lockfile.IsReadWrite() {
|
||
return -1, fmt.Errorf("not allowed to modify layer contents at %q: %w", r.layerdir, ErrStoreIsReadOnly)
|
||
}
|
||
|
||
layer, ok := r.lookup(to)
|
||
if !ok {
|
||
return -1, ErrLayerUnknown
|
||
}
|
||
|
||
header := make([]byte, 10240)
|
||
n, err := diff.Read(header)
|
||
if err != nil && err != io.EOF {
|
||
return -1, err
|
||
}
|
||
compression := archive.DetectCompression(header[:n])
|
||
defragmented := io.MultiReader(bytes.NewReader(header[:n]), diff)
|
||
|
||
// Decide if we need to compute digests
|
||
var compressedDigest, uncompressedDigest digest.Digest // = ""
|
||
var compressedDigester, uncompressedDigester digest.Digester // = nil
|
||
if layerOptions != nil && layerOptions.OriginalDigest != "" &&
|
||
layerOptions.OriginalDigest.Algorithm() == digest.Canonical {
|
||
compressedDigest = layerOptions.OriginalDigest
|
||
} else {
|
||
compressedDigester = digest.Canonical.Digester()
|
||
}
|
||
if layerOptions != nil && layerOptions.UncompressedDigest != "" &&
|
||
layerOptions.UncompressedDigest.Algorithm() == digest.Canonical {
|
||
uncompressedDigest = layerOptions.UncompressedDigest
|
||
} else if compression != archive.Uncompressed {
|
||
uncompressedDigester = digest.Canonical.Digester()
|
||
}
|
||
|
||
var compressedWriter io.Writer
|
||
if compressedDigester != nil {
|
||
compressedWriter = compressedDigester.Hash()
|
||
} else {
|
||
compressedWriter = io.Discard
|
||
}
|
||
compressedCounter := ioutils.NewWriteCounter(compressedWriter)
|
||
defragmented = io.TeeReader(defragmented, compressedCounter)
|
||
|
||
tsdata := bytes.Buffer{}
|
||
uidLog := make(map[uint32]struct{})
|
||
gidLog := make(map[uint32]struct{})
|
||
var uncompressedCounter *ioutils.WriteCounter
|
||
|
||
size, err = func() (int64, error) { // A scope for defer
|
||
compressor, err := pgzip.NewWriterLevel(&tsdata, pgzip.BestSpeed)
|
||
if err != nil {
|
||
return -1, err
|
||
}
|
||
defer compressor.Close() // This must happen before tsdata is consumed.
|
||
if err := compressor.SetConcurrency(1024*1024, 1); err != nil { // 1024*1024 is the hard-coded default; we're not changing that
|
||
logrus.Infof("setting compression concurrency threads to 1: %v; ignoring", err)
|
||
}
|
||
metadata := storage.NewJSONPacker(compressor)
|
||
uncompressed, err := archive.DecompressStream(defragmented)
|
||
if err != nil {
|
||
return -1, err
|
||
}
|
||
defer uncompressed.Close()
|
||
idLogger, err := tarlog.NewLogger(func(h *tar.Header) {
|
||
if !strings.HasPrefix(path.Base(h.Name), archive.WhiteoutPrefix) {
|
||
uidLog[uint32(h.Uid)] = struct{}{}
|
||
gidLog[uint32(h.Gid)] = struct{}{}
|
||
}
|
||
})
|
||
if err != nil {
|
||
return -1, err
|
||
}
|
||
defer idLogger.Close() // This must happen before uidLog and gidLog is consumed.
|
||
uncompressedCounter = ioutils.NewWriteCounter(idLogger)
|
||
uncompressedWriter := (io.Writer)(uncompressedCounter)
|
||
if uncompressedDigester != nil {
|
||
uncompressedWriter = io.MultiWriter(uncompressedWriter, uncompressedDigester.Hash())
|
||
}
|
||
payload, err := asm.NewInputTarStream(io.TeeReader(uncompressed, uncompressedWriter), metadata, storage.NewDiscardFilePutter())
|
||
if err != nil {
|
||
return -1, err
|
||
}
|
||
options := drivers.ApplyDiffOpts{
|
||
Diff: payload,
|
||
Mappings: r.layerMappings(layer),
|
||
MountLabel: layer.MountLabel,
|
||
}
|
||
size, err := r.driver.ApplyDiff(layer.ID, layer.Parent, options)
|
||
if err != nil {
|
||
return -1, err
|
||
}
|
||
return size, err
|
||
}()
|
||
if err != nil {
|
||
return -1, err
|
||
}
|
||
|
||
if err := os.MkdirAll(filepath.Dir(r.tspath(layer.ID)), 0o700); err != nil {
|
||
return -1, err
|
||
}
|
||
if err := ioutils.AtomicWriteFile(r.tspath(layer.ID), tsdata.Bytes(), 0o600); err != nil {
|
||
return -1, err
|
||
}
|
||
if compressedDigester != nil {
|
||
compressedDigest = compressedDigester.Digest()
|
||
}
|
||
if uncompressedDigester != nil {
|
||
uncompressedDigest = uncompressedDigester.Digest()
|
||
}
|
||
if uncompressedDigest == "" && compression == archive.Uncompressed {
|
||
uncompressedDigest = compressedDigest
|
||
}
|
||
|
||
updateDigestMap(&r.bycompressedsum, layer.CompressedDigest, compressedDigest, layer.ID)
|
||
layer.CompressedDigest = compressedDigest
|
||
if layerOptions != nil && layerOptions.OriginalDigest != "" && layerOptions.OriginalSize != nil {
|
||
layer.CompressedSize = *layerOptions.OriginalSize
|
||
} else {
|
||
layer.CompressedSize = compressedCounter.Count
|
||
}
|
||
updateDigestMap(&r.byuncompressedsum, layer.UncompressedDigest, uncompressedDigest, layer.ID)
|
||
layer.UncompressedDigest = uncompressedDigest
|
||
layer.UncompressedSize = uncompressedCounter.Count
|
||
layer.CompressionType = compression
|
||
layer.UIDs = make([]uint32, 0, len(uidLog))
|
||
for uid := range uidLog {
|
||
layer.UIDs = append(layer.UIDs, uid)
|
||
}
|
||
slices.Sort(layer.UIDs)
|
||
layer.GIDs = make([]uint32, 0, len(gidLog))
|
||
for gid := range gidLog {
|
||
layer.GIDs = append(layer.GIDs, gid)
|
||
}
|
||
slices.Sort(layer.GIDs)
|
||
|
||
err = r.saveFor(layer)
|
||
|
||
return size, err
|
||
}
|
||
|
||
// Requires (startReading or?) startWriting.
|
||
func (r *layerStore) DifferTarget(id string) (string, error) {
|
||
ddriver, ok := r.driver.(drivers.DriverWithDiffer)
|
||
if !ok {
|
||
return "", ErrNotSupported
|
||
}
|
||
layer, ok := r.lookup(id)
|
||
if !ok {
|
||
return "", ErrLayerUnknown
|
||
}
|
||
return ddriver.DifferTarget(layer.ID)
|
||
}
|
||
|
||
// Requires startWriting.
|
||
func (r *layerStore) applyDiffFromStagingDirectory(id string, diffOutput *drivers.DriverWithDifferOutput, options *drivers.ApplyDiffWithDifferOpts) error {
|
||
ddriver, ok := r.driver.(drivers.DriverWithDiffer)
|
||
if !ok {
|
||
return ErrNotSupported
|
||
}
|
||
layer, ok := r.lookup(id)
|
||
if !ok {
|
||
return ErrLayerUnknown
|
||
}
|
||
if options == nil {
|
||
options = &drivers.ApplyDiffWithDifferOpts{
|
||
ApplyDiffOpts: drivers.ApplyDiffOpts{
|
||
Mappings: r.layerMappings(layer),
|
||
MountLabel: layer.MountLabel,
|
||
},
|
||
Flags: nil,
|
||
}
|
||
}
|
||
|
||
err := ddriver.ApplyDiffFromStagingDirectory(layer.ID, layer.Parent, diffOutput, options)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
layer.UIDs = diffOutput.UIDs
|
||
layer.GIDs = diffOutput.GIDs
|
||
updateDigestMap(&r.byuncompressedsum, layer.UncompressedDigest, diffOutput.UncompressedDigest, layer.ID)
|
||
layer.UncompressedDigest = diffOutput.UncompressedDigest
|
||
updateDigestMap(&r.bycompressedsum, layer.CompressedDigest, diffOutput.CompressedDigest, layer.ID)
|
||
layer.CompressedDigest = diffOutput.CompressedDigest
|
||
updateDigestMap(&r.bytocsum, layer.TOCDigest, diffOutput.TOCDigest, layer.ID)
|
||
layer.TOCDigest = diffOutput.TOCDigest
|
||
layer.UncompressedSize = diffOutput.Size
|
||
layer.Metadata = diffOutput.Metadata
|
||
if options != nil && options.Flags != nil {
|
||
if layer.Flags == nil {
|
||
layer.Flags = make(map[string]any)
|
||
}
|
||
maps.Copy(layer.Flags, options.Flags)
|
||
}
|
||
if err = r.saveFor(layer); err != nil {
|
||
return err
|
||
}
|
||
|
||
if diffOutput.TarSplit != nil {
|
||
tsdata := bytes.Buffer{}
|
||
compressor, err := pgzip.NewWriterLevel(&tsdata, pgzip.BestSpeed)
|
||
if err != nil {
|
||
compressor = pgzip.NewWriter(&tsdata)
|
||
}
|
||
if err := compressor.SetConcurrency(1024*1024, 1); err != nil { // 1024*1024 is the hard-coded default; we're not changing that
|
||
logrus.Infof("setting compression concurrency threads to 1: %v; ignoring", err)
|
||
}
|
||
if _, err := compressor.Write(diffOutput.TarSplit); err != nil {
|
||
compressor.Close()
|
||
return err
|
||
}
|
||
compressor.Close()
|
||
if err := os.MkdirAll(filepath.Dir(r.tspath(layer.ID)), 0o700); err != nil {
|
||
return err
|
||
}
|
||
if err := ioutils.AtomicWriteFile(r.tspath(layer.ID), tsdata.Bytes(), 0o600); err != nil {
|
||
return err
|
||
}
|
||
}
|
||
for k, v := range diffOutput.BigData {
|
||
if err := r.SetBigData(id, k, bytes.NewReader(v)); err != nil {
|
||
if err2 := r.Delete(id); err2 != nil {
|
||
logrus.Errorf("While recovering from a failure to set big data, error deleting layer %#v: %v", id, err2)
|
||
}
|
||
return err
|
||
}
|
||
}
|
||
return err
|
||
}
|
||
|
||
// It must be called without any c/storage locks held to allow differ to make c/storage calls.
|
||
func (r *layerStore) applyDiffWithDifferNoLock(options *drivers.ApplyDiffWithDifferOpts, differ drivers.Differ) (*drivers.DriverWithDifferOutput, error) {
|
||
ddriver, ok := r.driver.(drivers.DriverWithDiffer)
|
||
if !ok {
|
||
return nil, ErrNotSupported
|
||
}
|
||
|
||
output, err := ddriver.ApplyDiffWithDiffer(options, differ)
|
||
return &output, err
|
||
}
|
||
|
||
func (r *layerStore) CleanupStagingDirectory(stagingDirectory string) error {
|
||
ddriver, ok := r.driver.(drivers.DriverWithDiffer)
|
||
if !ok {
|
||
return ErrNotSupported
|
||
}
|
||
return ddriver.CleanupStagingDirectory(stagingDirectory)
|
||
}
|
||
|
||
// Requires startReading or startWriting.
|
||
func (r *layerStore) layersByDigestMap(m map[digest.Digest][]string, d digest.Digest) ([]Layer, error) {
|
||
var layers []Layer
|
||
for _, layerID := range m[d] {
|
||
layer, ok := r.lookup(layerID)
|
||
if !ok {
|
||
return nil, ErrLayerUnknown
|
||
}
|
||
layers = append(layers, *copyLayer(layer))
|
||
}
|
||
return layers, nil
|
||
}
|
||
|
||
// Requires startReading or startWriting.
|
||
func (r *layerStore) LayersByCompressedDigest(d digest.Digest) ([]Layer, error) {
|
||
return r.layersByDigestMap(r.bycompressedsum, d)
|
||
}
|
||
|
||
// Requires startReading or startWriting.
|
||
func (r *layerStore) LayersByUncompressedDigest(d digest.Digest) ([]Layer, error) {
|
||
return r.layersByDigestMap(r.byuncompressedsum, d)
|
||
}
|
||
|
||
// Requires startReading or startWriting.
|
||
func (r *layerStore) LayersByTOCDigest(d digest.Digest) ([]Layer, error) {
|
||
return r.layersByDigestMap(r.bytocsum, d)
|
||
}
|
||
|
||
// Requires startWriting.
|
||
func (r *layerStore) dedup(req drivers.DedupArgs) (drivers.DedupResult, error) {
|
||
return r.driver.Dedup(req)
|
||
}
|
||
|
||
func closeAll(closes ...func() error) (rErr error) {
|
||
for _, f := range closes {
|
||
if err := f(); err != nil {
|
||
if rErr == nil {
|
||
rErr = fmt.Errorf("close error: %w", err)
|
||
continue
|
||
}
|
||
rErr = fmt.Errorf("%v: %w", err, rErr)
|
||
}
|
||
}
|
||
return
|
||
}
|