mirror of
https://github.com/containers/skopeo.git
synced 2025-04-27 19:05:32 +00:00
2774 lines
88 KiB
Go
2774 lines
88 KiB
Go
//go:build linux
|
||
|
||
package overlay
|
||
|
||
import (
|
||
"bytes"
|
||
"encoding/base64"
|
||
"errors"
|
||
"fmt"
|
||
"io"
|
||
"io/fs"
|
||
"os"
|
||
"os/exec"
|
||
"path"
|
||
"path/filepath"
|
||
"slices"
|
||
"strconv"
|
||
"strings"
|
||
"sync"
|
||
"syscall"
|
||
|
||
graphdriver "github.com/containers/storage/drivers"
|
||
"github.com/containers/storage/drivers/overlayutils"
|
||
"github.com/containers/storage/drivers/quota"
|
||
"github.com/containers/storage/internal/dedup"
|
||
"github.com/containers/storage/pkg/archive"
|
||
"github.com/containers/storage/pkg/chrootarchive"
|
||
"github.com/containers/storage/pkg/directory"
|
||
"github.com/containers/storage/pkg/fileutils"
|
||
"github.com/containers/storage/pkg/fsutils"
|
||
"github.com/containers/storage/pkg/idmap"
|
||
"github.com/containers/storage/pkg/idtools"
|
||
"github.com/containers/storage/pkg/lockfile"
|
||
"github.com/containers/storage/pkg/mount"
|
||
"github.com/containers/storage/pkg/parsers"
|
||
"github.com/containers/storage/pkg/system"
|
||
"github.com/containers/storage/pkg/unshare"
|
||
units "github.com/docker/go-units"
|
||
digest "github.com/opencontainers/go-digest"
|
||
"github.com/opencontainers/selinux/go-selinux"
|
||
"github.com/opencontainers/selinux/go-selinux/label"
|
||
"github.com/sirupsen/logrus"
|
||
"golang.org/x/sys/unix"
|
||
)
|
||
|
||
// untar defines the untar method
|
||
var untar = chrootarchive.UntarUncompressed
|
||
|
||
const (
|
||
defaultPerms = os.FileMode(0o555)
|
||
selinuxLabelTest = "system_u:object_r:container_file_t:s0"
|
||
mountProgramFlagFile = ".has-mount-program"
|
||
)
|
||
|
||
// This backend uses the overlay union filesystem for containers
|
||
// with diff directories for each layer.
|
||
|
||
// This version of the overlay driver requires at least kernel
|
||
// 4.0.0 in order to support mounting multiple diff directories.
|
||
|
||
// Each container/image has at least a "diff" directory and "link" file.
|
||
// If there is also a "lower" file when there are diff layers
|
||
// below as well as "merged" and "work" directories. The "diff" directory
|
||
// has the upper layer of the overlay and is used to capture any
|
||
// changes to the layer. The "lower" file contains all the lower layer
|
||
// mounts separated by ":" and ordered from uppermost to lowermost
|
||
// layers. The overlay itself is mounted in the "merged" directory,
|
||
// and the "work" dir is needed for overlay to work.
|
||
|
||
// The "link" file for each layer contains a unique string for the layer.
|
||
// Under the "l" directory at the root there will be a symbolic link
|
||
// with that unique string pointing the "diff" directory for the layer.
|
||
// The symbolic links are used to reference lower layers in the "lower"
|
||
// file and on mount. The links are used to shorten the total length
|
||
// of a layer reference without requiring changes to the layer identifier
|
||
// or root directory. Mounts are always done relative to root and
|
||
// referencing the symbolic links in order to ensure the number of
|
||
// lower directories can fit in a single page for making the mount
|
||
// syscall. A hard upper limit of 500 lower layers is enforced to ensure
|
||
// that mounts do not fail due to length.
|
||
|
||
const (
|
||
linkDir = "l"
|
||
stagingDir = "staging"
|
||
lowerFile = "lower"
|
||
maxDepth = 500
|
||
|
||
stagingLockFile = "staging.lock"
|
||
|
||
tocArtifact = "toc"
|
||
fsVerityDigestsArtifact = "fs-verity-digests"
|
||
|
||
// idLength represents the number of random characters
|
||
// which can be used to create the unique link identifier
|
||
// for every layer. If this value is too long then the
|
||
// page size limit for the mount command may be exceeded.
|
||
// The idLength should be selected such that following equation
|
||
// is true (512 is a buffer for label metadata, 128 is the
|
||
// number of lowers we want to be able to use without having
|
||
// to use bind mounts to get all the way to the kernel limit).
|
||
// ((idLength + len(linkDir) + 1) * 128) <= (pageSize - 512)
|
||
idLength = 26
|
||
)
|
||
|
||
type overlayOptions struct {
|
||
imageStores []string
|
||
layerStores []additionalLayerStore
|
||
quota quota.Quota
|
||
mountProgram string
|
||
skipMountHome bool
|
||
mountOptions string
|
||
ignoreChownErrors bool
|
||
forceMask *os.FileMode
|
||
useComposefs bool
|
||
}
|
||
|
||
// Driver contains information about the home directory and the list of active mounts that are created using this driver.
|
||
type Driver struct {
|
||
name string
|
||
home string
|
||
runhome string
|
||
imageStore string
|
||
ctr *graphdriver.RefCounter
|
||
quotaCtl *quota.Control
|
||
options overlayOptions
|
||
naiveDiff graphdriver.DiffDriver
|
||
supportsDType bool
|
||
supportsVolatile *bool
|
||
supportsDataOnly *bool
|
||
usingMetacopy bool
|
||
usingComposefs bool
|
||
|
||
stagingDirsLocksMutex sync.Mutex
|
||
// stagingDirsLocks access is not thread safe, it is required that callers take
|
||
// stagingDirsLocksMutex on each access to guard against concurrent map writes.
|
||
stagingDirsLocks map[string]*lockfile.LockFile
|
||
|
||
supportsIDMappedMounts *bool
|
||
}
|
||
|
||
type additionalLayerStore struct {
|
||
// path is the directory where this store is available on the host.
|
||
path string
|
||
|
||
// withReference is true when the store contains image reference information (base64-encoded)
|
||
// in its layer search path so the path to the diff will be
|
||
// <path>/base64(reference)/<layerdigest>/
|
||
withReference bool
|
||
}
|
||
|
||
var (
|
||
backingFs = "<unknown>"
|
||
projectQuotaSupported = false
|
||
|
||
useNaiveDiffLock sync.Once
|
||
useNaiveDiffOnly bool
|
||
)
|
||
|
||
func init() {
|
||
graphdriver.MustRegister("overlay", Init)
|
||
graphdriver.MustRegister("overlay2", Init)
|
||
}
|
||
|
||
func hasMetacopyOption(opts []string) bool {
|
||
return slices.Contains(opts, "metacopy=on")
|
||
}
|
||
|
||
func getMountProgramFlagFile(path string) string {
|
||
return filepath.Join(path, mountProgramFlagFile)
|
||
}
|
||
|
||
func checkSupportVolatile(home, runhome string) (bool, error) {
|
||
const feature = "volatile"
|
||
volatileCacheResult, _, err := cachedFeatureCheck(runhome, feature)
|
||
var usingVolatile bool
|
||
if err == nil {
|
||
if volatileCacheResult {
|
||
logrus.Debugf("Cached value indicated that volatile is being used")
|
||
} else {
|
||
logrus.Debugf("Cached value indicated that volatile is not being used")
|
||
}
|
||
usingVolatile = volatileCacheResult
|
||
} else {
|
||
usingVolatile, err = doesVolatile(home)
|
||
if err == nil {
|
||
if usingVolatile {
|
||
logrus.Debugf("overlay: test mount indicated that volatile is being used")
|
||
} else {
|
||
logrus.Debugf("overlay: test mount indicated that volatile is not being used")
|
||
}
|
||
if err = cachedFeatureRecord(runhome, feature, usingVolatile, ""); err != nil {
|
||
return false, fmt.Errorf("recording volatile-being-used status: %w", err)
|
||
}
|
||
} else {
|
||
usingVolatile = false
|
||
}
|
||
}
|
||
return usingVolatile, nil
|
||
}
|
||
|
||
// checkAndRecordIDMappedSupport checks and stores if the kernel supports mounting overlay on top of a
|
||
// idmapped lower layer.
|
||
func checkAndRecordIDMappedSupport(home, runhome string) (bool, error) {
|
||
if os.Geteuid() != 0 {
|
||
return false, nil
|
||
}
|
||
|
||
feature := "idmapped-lower-dir"
|
||
overlayCacheResult, overlayCacheText, err := cachedFeatureCheck(runhome, feature)
|
||
if err == nil {
|
||
if overlayCacheResult {
|
||
logrus.Debugf("Cached value indicated that idmapped mounts for overlay are supported")
|
||
return true, nil
|
||
}
|
||
logrus.Debugf("Cached value indicated that idmapped mounts for overlay are not supported")
|
||
return false, errors.New(overlayCacheText)
|
||
}
|
||
supportsIDMappedMounts, err := supportsIdmappedLowerLayers(home)
|
||
if err2 := cachedFeatureRecord(runhome, feature, supportsIDMappedMounts, ""); err2 != nil {
|
||
return false, fmt.Errorf("recording overlay idmapped mounts support status: %w", err2)
|
||
}
|
||
return supportsIDMappedMounts, err
|
||
}
|
||
|
||
func checkAndRecordOverlaySupport(fsMagic graphdriver.FsMagic, home, runhome string) (bool, error) {
|
||
var supportsDType bool
|
||
|
||
if os.Geteuid() != 0 {
|
||
return false, nil
|
||
}
|
||
|
||
feature := "overlay"
|
||
overlayCacheResult, overlayCacheText, err := cachedFeatureCheck(runhome, feature)
|
||
if err == nil {
|
||
if overlayCacheResult {
|
||
logrus.Debugf("Cached value indicated that overlay is supported")
|
||
} else {
|
||
logrus.Debugf("Cached value indicated that overlay is not supported")
|
||
}
|
||
supportsDType = overlayCacheResult
|
||
if !supportsDType {
|
||
return false, errors.New(overlayCacheText)
|
||
}
|
||
} else {
|
||
supportsDType, err = supportsOverlay(home, fsMagic, 0, 0)
|
||
if err != nil {
|
||
os.Remove(filepath.Join(home, linkDir))
|
||
os.Remove(home)
|
||
patherr, ok := err.(*os.PathError)
|
||
if ok && patherr.Err == syscall.ENOSPC {
|
||
return false, err
|
||
}
|
||
err = fmt.Errorf("kernel does not support overlay fs: %w", err)
|
||
if err2 := cachedFeatureRecord(runhome, feature, false, err.Error()); err2 != nil {
|
||
return false, fmt.Errorf("recording overlay not being supported (%v): %w", err, err2)
|
||
}
|
||
return false, err
|
||
}
|
||
if err = cachedFeatureRecord(runhome, feature, supportsDType, ""); err != nil {
|
||
return false, fmt.Errorf("recording overlay support status: %w", err)
|
||
}
|
||
}
|
||
return supportsDType, nil
|
||
}
|
||
|
||
func (d *Driver) getSupportsVolatile() (bool, error) {
|
||
if d.supportsVolatile != nil {
|
||
return *d.supportsVolatile, nil
|
||
}
|
||
supportsVolatile, err := checkSupportVolatile(d.home, d.runhome)
|
||
if err != nil {
|
||
return false, err
|
||
}
|
||
d.supportsVolatile = &supportsVolatile
|
||
return supportsVolatile, nil
|
||
}
|
||
|
||
func (d *Driver) getSupportsDataOnly() (bool, error) {
|
||
if d.supportsDataOnly != nil {
|
||
return *d.supportsDataOnly, nil
|
||
}
|
||
supportsDataOnly, err := supportsDataOnlyLayersCached(d.home, d.runhome)
|
||
if err != nil {
|
||
return false, err
|
||
}
|
||
d.supportsDataOnly = &supportsDataOnly
|
||
return supportsDataOnly, nil
|
||
}
|
||
|
||
// isNetworkFileSystem checks if the specified file system is supported by native overlay
|
||
// as backing store when running in a user namespace.
|
||
func isNetworkFileSystem(fsMagic graphdriver.FsMagic) bool {
|
||
switch fsMagic {
|
||
// a bunch of network file systems...
|
||
case graphdriver.FsMagicNfsFs, graphdriver.FsMagicSmbFs, graphdriver.FsMagicAcfs,
|
||
graphdriver.FsMagicAfs, graphdriver.FsMagicCephFs, graphdriver.FsMagicCIFS,
|
||
graphdriver.FsMagicGPFS, graphdriver.FsMagicIBRIX,
|
||
graphdriver.FsMagicKAFS, graphdriver.FsMagicLUSTRE, graphdriver.FsMagicNCP,
|
||
graphdriver.FsMagicNFSD, graphdriver.FsMagicOCFS2, graphdriver.FsMagicPANFS,
|
||
graphdriver.FsMagicPRLFS, graphdriver.FsMagicSMB2, graphdriver.FsMagicSNFS,
|
||
graphdriver.FsMagicVBOXSF, graphdriver.FsMagicVXFS:
|
||
return true
|
||
}
|
||
return false
|
||
}
|
||
|
||
// Init returns the a native diff driver for overlay filesystem.
|
||
// If overlay filesystem is not supported on the host, a wrapped graphdriver.ErrNotSupported is returned as error.
|
||
// If an overlay filesystem is not supported over an existing filesystem then a wrapped graphdriver.ErrIncompatibleFS is returned.
|
||
func Init(home string, options graphdriver.Options) (graphdriver.Driver, error) {
|
||
opts, err := parseOptions(options.DriverOptions)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
fsMagic, err := graphdriver.GetFSMagic(home)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
fsName, ok := graphdriver.FsNames[fsMagic]
|
||
if !ok {
|
||
fsName = "<unknown>"
|
||
}
|
||
backingFs = fsName
|
||
|
||
runhome := filepath.Join(options.RunRoot, filepath.Base(home))
|
||
|
||
// Create the driver home dir
|
||
if err := os.MkdirAll(path.Join(home, linkDir), 0o755); err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
if options.ImageStore != "" {
|
||
if err := idtools.MkdirAllAs(path.Join(options.ImageStore, linkDir), 0o755, 0, 0); err != nil {
|
||
return nil, err
|
||
}
|
||
}
|
||
|
||
if err := os.MkdirAll(runhome, 0o700); err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
if opts.mountProgram == "" {
|
||
if supported, err := SupportsNativeOverlay(home, runhome); err != nil {
|
||
return nil, err
|
||
} else if !supported {
|
||
if path, err := exec.LookPath("fuse-overlayfs"); err == nil {
|
||
opts.mountProgram = path
|
||
}
|
||
}
|
||
}
|
||
|
||
if opts.mountProgram != "" {
|
||
if unshare.IsRootless() && isNetworkFileSystem(fsMagic) && opts.forceMask == nil {
|
||
m := os.FileMode(0o700)
|
||
opts.forceMask = &m
|
||
logrus.Warnf("Network file system detected as backing store. Enforcing overlay option `force_mask=\"%o\"`. Add it to storage.conf to silence this warning", m)
|
||
}
|
||
|
||
if err := os.WriteFile(getMountProgramFlagFile(home), []byte("true"), 0o600); err != nil {
|
||
return nil, err
|
||
}
|
||
} else {
|
||
// check if they are running over btrfs, aufs, overlay, or ecryptfs
|
||
switch fsMagic {
|
||
case graphdriver.FsMagicAufs, graphdriver.FsMagicOverlay, graphdriver.FsMagicEcryptfs:
|
||
return nil, fmt.Errorf("'overlay' is not supported over %s, a mount_program is required: %w", backingFs, graphdriver.ErrIncompatibleFS)
|
||
}
|
||
if unshare.IsRootless() && isNetworkFileSystem(fsMagic) {
|
||
return nil, fmt.Errorf("a network file system with user namespaces is not supported. Please use a mount_program: %w", graphdriver.ErrIncompatibleFS)
|
||
}
|
||
}
|
||
|
||
if opts.useComposefs {
|
||
if unshare.IsRootless() {
|
||
return nil, fmt.Errorf("composefs is not supported in user namespaces")
|
||
}
|
||
if _, err := getComposeFsHelper(); err != nil {
|
||
return nil, fmt.Errorf("composefs helper program not found: %w", err)
|
||
}
|
||
}
|
||
|
||
var usingMetacopy bool
|
||
var supportsDType bool
|
||
var supportsVolatile *bool
|
||
if opts.mountProgram != "" {
|
||
supportsDType = true
|
||
t := true
|
||
supportsVolatile = &t
|
||
} else {
|
||
supportsDType, err = checkAndRecordOverlaySupport(fsMagic, home, runhome)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
feature := fmt.Sprintf("metacopy(%s)", opts.mountOptions)
|
||
metacopyCacheResult, _, err := cachedFeatureCheck(runhome, feature)
|
||
if err == nil {
|
||
if metacopyCacheResult {
|
||
logrus.Debugf("Cached value indicated that metacopy is being used")
|
||
} else {
|
||
logrus.Debugf("Cached value indicated that metacopy is not being used")
|
||
}
|
||
usingMetacopy = metacopyCacheResult
|
||
} else {
|
||
usingMetacopy, err = doesMetacopy(home, opts.mountOptions)
|
||
if err == nil {
|
||
if usingMetacopy {
|
||
logrus.Debugf("overlay: test mount indicated that metacopy is being used")
|
||
} else {
|
||
logrus.Debugf("overlay: test mount indicated that metacopy is not being used")
|
||
}
|
||
if err = cachedFeatureRecord(runhome, feature, usingMetacopy, ""); err != nil {
|
||
return nil, fmt.Errorf("recording metacopy-being-used status: %w", err)
|
||
}
|
||
} else {
|
||
logrus.Infof("overlay: test mount did not indicate whether or not metacopy is being used: %v", err)
|
||
return nil, err
|
||
}
|
||
}
|
||
}
|
||
|
||
if !opts.skipMountHome {
|
||
if err := mount.MakePrivate(home); err != nil {
|
||
return nil, fmt.Errorf("overlay: failed to make mount private: %w", err)
|
||
}
|
||
}
|
||
|
||
fileSystemType := graphdriver.FsMagicOverlay
|
||
if opts.mountProgram != "" {
|
||
fileSystemType = graphdriver.FsMagicFUSE
|
||
}
|
||
|
||
d := &Driver{
|
||
name: "overlay",
|
||
home: home,
|
||
imageStore: options.ImageStore,
|
||
runhome: runhome,
|
||
ctr: graphdriver.NewRefCounter(graphdriver.NewFsChecker(fileSystemType)),
|
||
supportsDType: supportsDType,
|
||
usingMetacopy: usingMetacopy,
|
||
supportsVolatile: supportsVolatile,
|
||
usingComposefs: opts.useComposefs,
|
||
options: *opts,
|
||
stagingDirsLocksMutex: sync.Mutex{},
|
||
stagingDirsLocks: make(map[string]*lockfile.LockFile),
|
||
}
|
||
|
||
d.naiveDiff = graphdriver.NewNaiveDiffDriver(d, graphdriver.NewNaiveLayerIDMapUpdater(d))
|
||
if backingFs == "xfs" {
|
||
// Try to enable project quota support over xfs.
|
||
if d.quotaCtl, err = quota.NewControl(home); err == nil {
|
||
projectQuotaSupported = true
|
||
} else if opts.quota.Size > 0 || opts.quota.Inodes > 0 {
|
||
return nil, fmt.Errorf("storage options overlay.size and overlay.inodes not supported. Filesystem does not support Project Quota: %w", err)
|
||
}
|
||
} else if opts.quota.Size > 0 || opts.quota.Inodes > 0 {
|
||
// if xfs is not the backing fs then error out if the storage-opt overlay.size is used.
|
||
return nil, fmt.Errorf("storage option overlay.size and overlay.inodes only supported for backingFS XFS. Found %v", backingFs)
|
||
}
|
||
|
||
logrus.Debugf("backingFs=%s, projectQuotaSupported=%v, useNativeDiff=%v, usingMetacopy=%v", backingFs, projectQuotaSupported, !d.useNaiveDiff(), d.usingMetacopy)
|
||
|
||
return d, nil
|
||
}
|
||
|
||
func parseOptions(options []string) (*overlayOptions, error) {
|
||
o := &overlayOptions{}
|
||
for _, option := range options {
|
||
key, val, err := parsers.ParseKeyValueOpt(option)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
trimkey := strings.ToLower(key)
|
||
trimkey = strings.TrimPrefix(trimkey, "overlay.")
|
||
trimkey = strings.TrimPrefix(trimkey, "overlay2.")
|
||
trimkey = strings.TrimPrefix(trimkey, ".")
|
||
switch trimkey {
|
||
case "override_kernel_check":
|
||
logrus.Debugf("overlay: override_kernel_check option was specified, but is no longer necessary")
|
||
case "mountopt":
|
||
o.mountOptions = val
|
||
case "size":
|
||
logrus.Debugf("overlay: size=%s", val)
|
||
size, err := units.RAMInBytes(val)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
o.quota.Size = uint64(size)
|
||
case "inodes":
|
||
logrus.Debugf("overlay: inodes=%s", val)
|
||
inodes, err := strconv.ParseUint(val, 10, 64)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
o.quota.Inodes = inodes
|
||
case "imagestore", "additionalimagestore":
|
||
logrus.Debugf("overlay: imagestore=%s", val)
|
||
// Additional read only image stores to use for lower paths
|
||
if val == "" {
|
||
continue
|
||
}
|
||
for _, store := range strings.Split(val, ",") {
|
||
store = filepath.Clean(store)
|
||
if !filepath.IsAbs(store) {
|
||
return nil, fmt.Errorf("overlay: image path %q is not absolute. Can not be relative", store)
|
||
}
|
||
st, err := os.Stat(store)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("overlay: can't stat imageStore dir %s: %w", store, err)
|
||
}
|
||
if !st.IsDir() {
|
||
return nil, fmt.Errorf("overlay: image path %q must be a directory", store)
|
||
}
|
||
o.imageStores = append(o.imageStores, store)
|
||
}
|
||
case "additionallayerstore":
|
||
logrus.Debugf("overlay: additionallayerstore=%s", val)
|
||
// Additional read only layer stores to use for lower paths
|
||
if val == "" {
|
||
continue
|
||
}
|
||
for _, lstore := range strings.Split(val, ",") {
|
||
elems := strings.Split(lstore, ":")
|
||
lstore = filepath.Clean(elems[0])
|
||
if !filepath.IsAbs(lstore) {
|
||
return nil, fmt.Errorf("overlay: additionallayerstore path %q is not absolute. Can not be relative", lstore)
|
||
}
|
||
st, err := os.Stat(lstore)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("overlay: can't stat additionallayerstore dir: %w", err)
|
||
}
|
||
if !st.IsDir() {
|
||
return nil, fmt.Errorf("overlay: additionallayerstore path %q must be a directory", lstore)
|
||
}
|
||
var withReference bool
|
||
for _, e := range elems[1:] {
|
||
switch e {
|
||
case "ref":
|
||
if withReference {
|
||
return nil, fmt.Errorf("overlay: additionallayerstore config of %q contains %q option twice", lstore, e)
|
||
}
|
||
withReference = true
|
||
default:
|
||
return nil, fmt.Errorf("overlay: additionallayerstore config %q contains unknown option %q", lstore, e)
|
||
}
|
||
}
|
||
o.layerStores = append(o.layerStores, additionalLayerStore{
|
||
path: lstore,
|
||
withReference: withReference,
|
||
})
|
||
}
|
||
case "use_composefs":
|
||
logrus.Debugf("overlay: use_composefs=%s", val)
|
||
o.useComposefs, err = strconv.ParseBool(val)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
case "mount_program":
|
||
logrus.Debugf("overlay: mount_program=%s", val)
|
||
if val != "" {
|
||
err := fileutils.Exists(val)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("overlay: can't stat program %q: %w", val, err)
|
||
}
|
||
}
|
||
o.mountProgram = val
|
||
case "skip_mount_home":
|
||
logrus.Debugf("overlay: skip_mount_home=%s", val)
|
||
o.skipMountHome, err = strconv.ParseBool(val)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
case "ignore_chown_errors":
|
||
logrus.Debugf("overlay: ignore_chown_errors=%s", val)
|
||
o.ignoreChownErrors, err = strconv.ParseBool(val)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
case "force_mask":
|
||
logrus.Debugf("overlay: force_mask=%s", val)
|
||
var mask int64
|
||
switch val {
|
||
case "shared":
|
||
mask = 0o755
|
||
case "private":
|
||
mask = 0o700
|
||
default:
|
||
mask, err = strconv.ParseInt(val, 8, 32)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
}
|
||
m := os.FileMode(mask)
|
||
o.forceMask = &m
|
||
default:
|
||
return nil, fmt.Errorf("overlay: unknown option %s", key)
|
||
}
|
||
}
|
||
return o, nil
|
||
}
|
||
|
||
func cachedFeatureSet(feature string, set bool) string {
|
||
if set {
|
||
return fmt.Sprintf("%s-true", feature)
|
||
}
|
||
return fmt.Sprintf("%s-false", feature)
|
||
}
|
||
|
||
func cachedFeatureCheck(runhome, feature string) (supported bool, text string, err error) {
|
||
content, err := os.ReadFile(filepath.Join(runhome, cachedFeatureSet(feature, true)))
|
||
if err == nil {
|
||
return true, string(content), nil
|
||
}
|
||
content, err = os.ReadFile(filepath.Join(runhome, cachedFeatureSet(feature, false)))
|
||
if err == nil {
|
||
return false, string(content), nil
|
||
}
|
||
return false, "", err
|
||
}
|
||
|
||
func cachedFeatureRecord(runhome, feature string, supported bool, text string) (err error) {
|
||
f, err := os.Create(filepath.Join(runhome, cachedFeatureSet(feature, supported)))
|
||
if f != nil {
|
||
if text != "" {
|
||
fmt.Fprintf(f, "%s", text)
|
||
}
|
||
f.Close()
|
||
}
|
||
return err
|
||
}
|
||
|
||
func SupportsNativeOverlay(home, runhome string) (bool, error) {
|
||
if os.Geteuid() != 0 || home == "" || runhome == "" {
|
||
return false, nil
|
||
}
|
||
|
||
var contents string
|
||
flagContent, err := os.ReadFile(getMountProgramFlagFile(home))
|
||
if err == nil {
|
||
contents = strings.TrimSpace(string(flagContent))
|
||
}
|
||
switch contents {
|
||
case "true":
|
||
logrus.Debugf("overlay: storage already configured with a mount-program")
|
||
return false, nil
|
||
case "false":
|
||
// Do nothing.
|
||
default:
|
||
needsMountProgram, err := scanForMountProgramIndicators(home)
|
||
if err != nil && !os.IsNotExist(err) {
|
||
return false, err
|
||
}
|
||
if err := os.WriteFile(getMountProgramFlagFile(home), []byte(fmt.Sprintf("%t", needsMountProgram)), 0o600); err != nil && !os.IsNotExist(err) {
|
||
return false, err
|
||
}
|
||
if needsMountProgram {
|
||
return false, nil
|
||
}
|
||
// fall through to check if we find ourselves needing to use a
|
||
// mount program now
|
||
}
|
||
|
||
for _, dir := range []string{home, runhome} {
|
||
if err := fileutils.Exists(dir); err != nil {
|
||
_ = idtools.MkdirAllAs(dir, 0o700, 0, 0)
|
||
}
|
||
}
|
||
|
||
fsMagic, err := graphdriver.GetFSMagic(home)
|
||
if err != nil {
|
||
return false, err
|
||
}
|
||
|
||
supportsDType, _ := checkAndRecordOverlaySupport(fsMagic, home, runhome)
|
||
return supportsDType, nil
|
||
}
|
||
|
||
func supportsOverlay(home string, homeMagic graphdriver.FsMagic, rootUID, rootGID int) (supportsDType bool, err error) {
|
||
selinuxLabelTest := selinux.PrivContainerMountLabel()
|
||
|
||
logLevel := logrus.ErrorLevel
|
||
if unshare.IsRootless() {
|
||
logLevel = logrus.DebugLevel
|
||
}
|
||
|
||
layerDir, err := os.MkdirTemp(home, "compat")
|
||
if err != nil {
|
||
patherr, ok := err.(*os.PathError)
|
||
if ok && patherr.Err == syscall.ENOSPC {
|
||
return false, err
|
||
}
|
||
}
|
||
if err == nil {
|
||
// Check if reading the directory's contents populates the d_type field, which is required
|
||
// for proper operation of the overlay filesystem.
|
||
supportsDType, err = fsutils.SupportsDType(layerDir)
|
||
if err != nil {
|
||
return false, err
|
||
}
|
||
if !supportsDType {
|
||
return false, overlayutils.ErrDTypeNotSupported("overlay", backingFs)
|
||
}
|
||
|
||
// Try a test mount in the specific location we're looking at using.
|
||
mergedDir := filepath.Join(layerDir, "merged")
|
||
mergedSubdir := filepath.Join(mergedDir, "subdir")
|
||
lower1Dir := filepath.Join(layerDir, "lower1")
|
||
lower2Dir := filepath.Join(layerDir, "lower2")
|
||
lower2Subdir := filepath.Join(lower2Dir, "subdir")
|
||
lower2SubdirFile := filepath.Join(lower2Subdir, "file")
|
||
upperDir := filepath.Join(layerDir, "upper")
|
||
workDir := filepath.Join(layerDir, "work")
|
||
defer func() {
|
||
// Permitted to fail, since the various subdirectories
|
||
// can be empty or not even there, and the home might
|
||
// legitimately be not empty
|
||
_ = unix.Unmount(mergedDir, unix.MNT_DETACH)
|
||
_ = os.RemoveAll(layerDir)
|
||
_ = os.Remove(home)
|
||
}()
|
||
_ = idtools.MkdirAs(mergedDir, 0o700, rootUID, rootGID)
|
||
_ = idtools.MkdirAs(lower1Dir, 0o700, rootUID, rootGID)
|
||
_ = idtools.MkdirAs(lower2Dir, 0o700, rootUID, rootGID)
|
||
_ = idtools.MkdirAs(lower2Subdir, 0o700, rootUID, rootGID)
|
||
_ = idtools.MkdirAs(upperDir, 0o700, rootUID, rootGID)
|
||
_ = idtools.MkdirAs(workDir, 0o700, rootUID, rootGID)
|
||
f, err := os.Create(lower2SubdirFile)
|
||
if err != nil {
|
||
logrus.Debugf("Unable to create test file: %v", err)
|
||
return supportsDType, fmt.Errorf("unable to create test file: %w", err)
|
||
}
|
||
f.Close()
|
||
flags := fmt.Sprintf("lowerdir=%s:%s,upperdir=%s,workdir=%s", lower1Dir, lower2Dir, upperDir, workDir)
|
||
if selinux.GetEnabled() &&
|
||
selinux.SecurityCheckContext(selinuxLabelTest) == nil {
|
||
// Linux 5.11 introduced unprivileged overlay mounts but it has an issue
|
||
// when used together with selinux labels.
|
||
// Check that overlay supports selinux labels as well.
|
||
flags = label.FormatMountLabel(flags, selinuxLabelTest)
|
||
}
|
||
if unshare.IsRootless() {
|
||
flags = fmt.Sprintf("%s,userxattr", flags)
|
||
}
|
||
if err := syscall.Mknod(filepath.Join(upperDir, "whiteout"), syscall.S_IFCHR|0o600, int(unix.Mkdev(0, 0))); err != nil {
|
||
logrus.Debugf("Unable to create kernel-style whiteout: %v", err)
|
||
return supportsDType, fmt.Errorf("unable to create kernel-style whiteout: %w", err)
|
||
}
|
||
|
||
if len(flags) < unix.Getpagesize() {
|
||
err := unix.Mount("overlay", mergedDir, "overlay", 0, flags)
|
||
if err == nil {
|
||
if err = os.RemoveAll(mergedSubdir); err != nil {
|
||
logrus.StandardLogger().Logf(logLevel, "overlay: removing an item from the merged directory failed: %v", err)
|
||
return supportsDType, fmt.Errorf("kernel returned %v when we tried to delete an item in the merged directory: %w", err, graphdriver.ErrNotSupported)
|
||
}
|
||
logrus.Debugf("overlay: test mount with multiple lowers succeeded")
|
||
return supportsDType, nil
|
||
}
|
||
logrus.Debugf("overlay: test mount with multiple lowers failed: %v", err)
|
||
}
|
||
flags = fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", lower1Dir, upperDir, workDir)
|
||
if selinux.GetEnabled() {
|
||
flags = label.FormatMountLabel(flags, selinuxLabelTest)
|
||
}
|
||
if len(flags) < unix.Getpagesize() {
|
||
err := unix.Mount("overlay", mergedDir, "overlay", 0, flags)
|
||
if err == nil {
|
||
logrus.StandardLogger().Logf(logLevel, "overlay: test mount with multiple lowers failed, but succeeded with a single lower")
|
||
return supportsDType, fmt.Errorf("kernel too old to provide multiple lowers feature for overlay: %w", graphdriver.ErrNotSupported)
|
||
}
|
||
logrus.Debugf("overlay: test mount with a single lower failed: %v", err)
|
||
}
|
||
logrus.StandardLogger().Logf(logLevel, "'overlay' is not supported over %s at %q", backingFs, home)
|
||
return supportsDType, fmt.Errorf("'overlay' is not supported over %s at %q: %w", backingFs, home, graphdriver.ErrIncompatibleFS)
|
||
}
|
||
|
||
logrus.StandardLogger().Logf(logLevel, "'overlay' not found as a supported filesystem on this host. Please ensure kernel is new enough and has overlay support loaded.")
|
||
return supportsDType, fmt.Errorf("'overlay' not found as a supported filesystem on this host. Please ensure kernel is new enough and has overlay support loaded.: %w", graphdriver.ErrNotSupported)
|
||
}
|
||
|
||
func (d *Driver) useNaiveDiff() bool {
|
||
if d.usingComposefs {
|
||
return true
|
||
}
|
||
|
||
useNaiveDiffLock.Do(func() {
|
||
if d.options.mountProgram != "" {
|
||
useNaiveDiffOnly = true
|
||
return
|
||
}
|
||
feature := fmt.Sprintf("native-diff(%s)", d.options.mountOptions)
|
||
nativeDiffCacheResult, nativeDiffCacheText, err := cachedFeatureCheck(d.runhome, feature)
|
||
if err == nil {
|
||
if nativeDiffCacheResult {
|
||
logrus.Debugf("Cached value indicated that native-diff is usable")
|
||
} else {
|
||
logrus.Debugf("Cached value indicated that native-diff is not being used")
|
||
logrus.Info(nativeDiffCacheText)
|
||
}
|
||
useNaiveDiffOnly = !nativeDiffCacheResult
|
||
return
|
||
}
|
||
if err := doesSupportNativeDiff(d.home, d.options.mountOptions); err != nil {
|
||
nativeDiffCacheText = fmt.Sprintf("Not using native diff for overlay, this may cause degraded performance for building images: %v", err)
|
||
logrus.Info(nativeDiffCacheText)
|
||
useNaiveDiffOnly = true
|
||
}
|
||
if err := cachedFeatureRecord(d.runhome, feature, !useNaiveDiffOnly, nativeDiffCacheText); err != nil {
|
||
logrus.Warnf("Recording overlay native-diff support status: %v", err)
|
||
}
|
||
})
|
||
return useNaiveDiffOnly
|
||
}
|
||
|
||
func (d *Driver) String() string {
|
||
return d.name
|
||
}
|
||
|
||
// Status returns current driver information in a two dimensional string array.
|
||
// Output contains "Backing Filesystem" used in this implementation.
|
||
func (d *Driver) Status() [][2]string {
|
||
supportsVolatile, err := d.getSupportsVolatile()
|
||
if err != nil {
|
||
supportsVolatile = false
|
||
}
|
||
return [][2]string{
|
||
{"Backing Filesystem", backingFs},
|
||
{"Supports d_type", strconv.FormatBool(d.supportsDType)},
|
||
{"Native Overlay Diff", strconv.FormatBool(!d.useNaiveDiff())},
|
||
{"Using metacopy", strconv.FormatBool(d.usingMetacopy)},
|
||
{"Supports shifting", strconv.FormatBool(d.SupportsShifting())},
|
||
{"Supports volatile", strconv.FormatBool(supportsVolatile)},
|
||
}
|
||
}
|
||
|
||
// Metadata returns meta data about the overlay driver such as
|
||
// LowerDir, UpperDir, WorkDir and MergeDir used to store data.
|
||
func (d *Driver) Metadata(id string) (map[string]string, error) {
|
||
dir, _, inAdditionalStore := d.dir2(id, false)
|
||
if err := fileutils.Exists(dir); err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
metadata := map[string]string{
|
||
"WorkDir": path.Join(dir, "work"),
|
||
"MergedDir": d.getMergedDir(id, dir, inAdditionalStore),
|
||
"UpperDir": path.Join(dir, "diff"),
|
||
}
|
||
|
||
lowerDirs, err := d.getLowerDirs(id)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
if len(lowerDirs) > 0 {
|
||
metadata["LowerDir"] = strings.Join(lowerDirs, ":")
|
||
}
|
||
|
||
return metadata, nil
|
||
}
|
||
|
||
// Cleanup any state created by overlay which should be cleaned when
|
||
// the storage is being shutdown. The only state created by the driver
|
||
// is the bind mount on the home directory.
|
||
func (d *Driver) Cleanup() error {
|
||
anyPresent := d.pruneStagingDirectories()
|
||
if anyPresent {
|
||
return nil
|
||
}
|
||
return mount.Unmount(d.home)
|
||
}
|
||
|
||
// pruneStagingDirectories cleans up any staging directory that was leaked.
|
||
// It returns whether any staging directory is still present.
|
||
func (d *Driver) pruneStagingDirectories() bool {
|
||
d.stagingDirsLocksMutex.Lock()
|
||
for _, lock := range d.stagingDirsLocks {
|
||
lock.Unlock()
|
||
}
|
||
clear(d.stagingDirsLocks)
|
||
d.stagingDirsLocksMutex.Unlock()
|
||
|
||
anyPresent := false
|
||
|
||
stagingDirBase := filepath.Join(d.homeDirForImageStore(), stagingDir)
|
||
dirs, err := os.ReadDir(stagingDirBase)
|
||
if err == nil {
|
||
for _, dir := range dirs {
|
||
stagingDirToRemove := filepath.Join(stagingDirBase, dir.Name())
|
||
lock, err := lockfile.GetLockFile(filepath.Join(stagingDirToRemove, stagingLockFile))
|
||
if err != nil {
|
||
anyPresent = true
|
||
continue
|
||
}
|
||
if err := lock.TryLock(); err != nil {
|
||
anyPresent = true
|
||
continue
|
||
}
|
||
_ = os.RemoveAll(stagingDirToRemove)
|
||
lock.Unlock()
|
||
}
|
||
}
|
||
return anyPresent
|
||
}
|
||
|
||
// LookupAdditionalLayer looks up additional layer store by the specified
|
||
// TOC digest and ref and returns an object representing that layer.
|
||
// This API is experimental and can be changed without bumping the major version number.
|
||
// TODO: to remove the comment once it's no longer experimental.
|
||
func (d *Driver) LookupAdditionalLayer(tocDigest digest.Digest, ref string) (graphdriver.AdditionalLayer, error) {
|
||
l, err := d.getAdditionalLayerPath(tocDigest, ref)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
// Tell the additional layer store that we use this layer.
|
||
// This will increase reference counter on the store's side.
|
||
// This will be decreased on Release() method.
|
||
notifyUseAdditionalLayer(l)
|
||
return &additionalLayer{
|
||
path: l,
|
||
d: d,
|
||
}, nil
|
||
}
|
||
|
||
// LookupAdditionalLayerByID looks up additional layer store by the specified
|
||
// ID and returns an object representing that layer.
|
||
// This API is experimental and can be changed without bumping the major version number.
|
||
// TODO: to remove the comment once it's no longer experimental.
|
||
func (d *Driver) LookupAdditionalLayerByID(id string) (graphdriver.AdditionalLayer, error) {
|
||
l, err := d.getAdditionalLayerPathByID(id)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
// Tell the additional layer store that we use this layer.
|
||
// This will increase reference counter on the store's side.
|
||
// This will be decreased on Release() method.
|
||
notifyUseAdditionalLayer(l)
|
||
return &additionalLayer{
|
||
path: l,
|
||
d: d,
|
||
}, nil
|
||
}
|
||
|
||
// CreateFromTemplate creates a layer with the same contents and parent as another layer.
|
||
func (d *Driver) CreateFromTemplate(id, template string, templateIDMappings *idtools.IDMappings, parent string, parentIDMappings *idtools.IDMappings, opts *graphdriver.CreateOpts, readWrite bool) error {
|
||
if readWrite {
|
||
return d.CreateReadWrite(id, template, opts)
|
||
}
|
||
return d.Create(id, template, opts)
|
||
}
|
||
|
||
// CreateReadWrite creates a layer that is writable for use as a container
|
||
// file system.
|
||
func (d *Driver) CreateReadWrite(id, parent string, opts *graphdriver.CreateOpts) error {
|
||
if opts != nil && len(opts.StorageOpt) != 0 && !projectQuotaSupported {
|
||
return fmt.Errorf("--storage-opt is supported only for overlay over xfs with 'pquota' mount option")
|
||
}
|
||
|
||
if opts == nil {
|
||
opts = &graphdriver.CreateOpts{
|
||
StorageOpt: map[string]string{},
|
||
}
|
||
}
|
||
|
||
if d.options.forceMask != nil && d.options.mountProgram == "" {
|
||
return fmt.Errorf("overlay: force_mask option for writeable layers is only supported with a mount_program")
|
||
}
|
||
|
||
if _, ok := opts.StorageOpt["size"]; !ok {
|
||
if opts.StorageOpt == nil {
|
||
opts.StorageOpt = map[string]string{}
|
||
}
|
||
opts.StorageOpt["size"] = strconv.FormatUint(d.options.quota.Size, 10)
|
||
}
|
||
|
||
if _, ok := opts.StorageOpt["inodes"]; !ok {
|
||
if opts.StorageOpt == nil {
|
||
opts.StorageOpt = map[string]string{}
|
||
}
|
||
opts.StorageOpt["inodes"] = strconv.FormatUint(d.options.quota.Inodes, 10)
|
||
}
|
||
|
||
return d.create(id, parent, opts, false)
|
||
}
|
||
|
||
// Create is used to create the upper, lower, and merge directories required for overlay fs for a given id.
|
||
// The parent filesystem is used to configure these directories for the overlay.
|
||
func (d *Driver) Create(id, parent string, opts *graphdriver.CreateOpts) (retErr error) {
|
||
if opts != nil && len(opts.StorageOpt) != 0 {
|
||
if _, ok := opts.StorageOpt["size"]; ok {
|
||
return fmt.Errorf("--storage-opt size is only supported for ReadWrite Layers")
|
||
}
|
||
|
||
if _, ok := opts.StorageOpt["inodes"]; ok {
|
||
return fmt.Errorf("--storage-opt inodes is only supported for ReadWrite Layers")
|
||
}
|
||
}
|
||
|
||
return d.create(id, parent, opts, true)
|
||
}
|
||
|
||
func (d *Driver) create(id, parent string, opts *graphdriver.CreateOpts, readOnly bool) (retErr error) {
|
||
dir, homedir, _ := d.dir2(id, readOnly)
|
||
|
||
disableQuota := readOnly
|
||
|
||
var uidMaps []idtools.IDMap
|
||
var gidMaps []idtools.IDMap
|
||
|
||
if opts != nil && opts.IDMappings != nil {
|
||
uidMaps = opts.IDMappings.UIDs()
|
||
gidMaps = opts.IDMappings.GIDs()
|
||
}
|
||
|
||
// Make the link directory if it does not exist
|
||
if err := idtools.MkdirAllAs(path.Join(homedir, linkDir), 0o755, 0, 0); err != nil {
|
||
return err
|
||
}
|
||
|
||
rootUID, rootGID, err := idtools.GetRootUIDGID(uidMaps, gidMaps)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
|
||
idPair := idtools.IDPair{
|
||
UID: rootUID,
|
||
GID: rootGID,
|
||
}
|
||
|
||
if err := idtools.MkdirAllAndChownNew(path.Dir(dir), 0o755, idPair); err != nil {
|
||
return err
|
||
}
|
||
|
||
st := idtools.Stat{IDs: idPair, Mode: defaultPerms}
|
||
|
||
if parent != "" {
|
||
parentBase := d.dir(parent)
|
||
parentDiff := filepath.Join(parentBase, "diff")
|
||
if xSt, err := idtools.GetContainersOverrideXattr(parentDiff); err == nil {
|
||
st = xSt
|
||
} else {
|
||
systemSt, err := system.Stat(parentDiff)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
st.IDs.UID = int(systemSt.UID())
|
||
st.IDs.GID = int(systemSt.GID())
|
||
st.Mode = os.FileMode(systemSt.Mode())
|
||
}
|
||
}
|
||
|
||
if err := fileutils.Lexists(dir); err == nil {
|
||
logrus.Warnf("Trying to create a layer %#v while directory %q already exists; removing it first", id, dir)
|
||
// Don’t just os.RemoveAll(dir) here; d.Remove also removes the link in linkDir,
|
||
// so that we can’t end up with two symlinks in linkDir pointing to the same layer.
|
||
if err := d.Remove(id); err != nil {
|
||
return fmt.Errorf("removing a pre-existing layer directory %q: %w", dir, err)
|
||
}
|
||
}
|
||
|
||
if err := idtools.MkdirAllAndChownNew(dir, 0o700, idPair); err != nil {
|
||
return err
|
||
}
|
||
|
||
defer func() {
|
||
// Clean up on failure
|
||
if retErr != nil {
|
||
if err2 := os.RemoveAll(dir); err2 != nil {
|
||
logrus.Errorf("While recovering from a failure creating a layer, error deleting %#v: %v", dir, err2)
|
||
}
|
||
}
|
||
}()
|
||
|
||
if d.quotaCtl != nil && !disableQuota {
|
||
quota := quota.Quota{}
|
||
if opts != nil && len(opts.StorageOpt) > 0 {
|
||
driver := &Driver{}
|
||
if err := d.parseStorageOpt(opts.StorageOpt, driver); err != nil {
|
||
return err
|
||
}
|
||
if driver.options.quota.Size > 0 {
|
||
quota.Size = driver.options.quota.Size
|
||
}
|
||
if driver.options.quota.Inodes > 0 {
|
||
quota.Inodes = driver.options.quota.Inodes
|
||
}
|
||
}
|
||
// Set container disk quota limit
|
||
// If it is set to 0, we will track the disk usage, but not enforce a limit
|
||
if err := d.quotaCtl.SetQuota(dir, quota); err != nil {
|
||
return err
|
||
}
|
||
}
|
||
|
||
forcedSt := st
|
||
if d.options.forceMask != nil {
|
||
forcedSt.IDs = idPair
|
||
forcedSt.Mode = *d.options.forceMask
|
||
}
|
||
|
||
diff := path.Join(dir, "diff")
|
||
if err := idtools.MkdirAs(diff, forcedSt.Mode, forcedSt.IDs.UID, forcedSt.IDs.GID); err != nil {
|
||
return err
|
||
}
|
||
|
||
if d.options.forceMask != nil {
|
||
st.Mode |= os.ModeDir
|
||
if err := idtools.SetContainersOverrideXattr(diff, st); err != nil {
|
||
return err
|
||
}
|
||
}
|
||
|
||
lid := generateID(idLength)
|
||
|
||
linkBase := path.Join("..", id, "diff")
|
||
if err := os.Symlink(linkBase, path.Join(homedir, linkDir, lid)); err != nil {
|
||
return err
|
||
}
|
||
|
||
// Write link id to link file
|
||
if err := os.WriteFile(path.Join(dir, "link"), []byte(lid), 0o644); err != nil {
|
||
return err
|
||
}
|
||
|
||
if err := idtools.MkdirAs(path.Join(dir, "work"), 0o700, forcedSt.IDs.UID, forcedSt.IDs.GID); err != nil {
|
||
return err
|
||
}
|
||
if err := idtools.MkdirAs(path.Join(dir, "merged"), 0o700, forcedSt.IDs.UID, forcedSt.IDs.GID); err != nil {
|
||
return err
|
||
}
|
||
|
||
// if no parent directory, create a dummy lower directory and skip writing a "lowers" file
|
||
if parent == "" {
|
||
return idtools.MkdirAs(path.Join(dir, "empty"), 0o700, forcedSt.IDs.UID, forcedSt.IDs.GID)
|
||
}
|
||
|
||
lower, err := d.getLower(parent)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
if lower != "" {
|
||
if err := os.WriteFile(path.Join(dir, lowerFile), []byte(lower), 0o666); err != nil {
|
||
return err
|
||
}
|
||
}
|
||
|
||
return nil
|
||
}
|
||
|
||
// Parse overlay storage options
|
||
func (d *Driver) parseStorageOpt(storageOpt map[string]string, driver *Driver) error {
|
||
// Read size to set the disk project quota per container
|
||
for key, val := range storageOpt {
|
||
key := strings.ToLower(key)
|
||
switch key {
|
||
case "size":
|
||
size, err := units.RAMInBytes(val)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
driver.options.quota.Size = uint64(size)
|
||
case "inodes":
|
||
inodes, err := strconv.ParseUint(val, 10, 64)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
driver.options.quota.Inodes = inodes
|
||
default:
|
||
return fmt.Errorf("unknown option %s", key)
|
||
}
|
||
}
|
||
|
||
return nil
|
||
}
|
||
|
||
func (d *Driver) getLower(parent string) (string, error) {
|
||
parentDir := d.dir(parent)
|
||
|
||
// Ensure parent exists
|
||
if err := fileutils.Lexists(parentDir); err != nil {
|
||
return "", err
|
||
}
|
||
|
||
// Read Parent link fileA
|
||
parentLink, err := os.ReadFile(path.Join(parentDir, "link"))
|
||
if err != nil {
|
||
if !os.IsNotExist(err) {
|
||
return "", err
|
||
}
|
||
logrus.Warnf("Can't read parent link %q because it does not exist. Going through storage to recreate the missing links.", path.Join(parentDir, "link"))
|
||
if err := d.recreateSymlinks(); err != nil {
|
||
return "", fmt.Errorf("recreating the links: %w", err)
|
||
}
|
||
parentLink, err = os.ReadFile(path.Join(parentDir, "link"))
|
||
if err != nil {
|
||
return "", err
|
||
}
|
||
}
|
||
lowers := []string{path.Join(linkDir, string(parentLink))}
|
||
|
||
parentLower, err := os.ReadFile(path.Join(parentDir, lowerFile))
|
||
if err == nil {
|
||
parentLowers := strings.Split(string(parentLower), ":")
|
||
lowers = append(lowers, parentLowers...)
|
||
}
|
||
return strings.Join(lowers, ":"), nil
|
||
}
|
||
|
||
func (d *Driver) dir(id string) string {
|
||
p, _, _ := d.dir2(id, false)
|
||
return p
|
||
}
|
||
|
||
func (d *Driver) getAllImageStores() []string {
|
||
additionalImageStores := d.AdditionalImageStores()
|
||
if d.imageStore != "" {
|
||
additionalImageStores = append([]string{d.imageStore}, additionalImageStores...)
|
||
}
|
||
return additionalImageStores
|
||
}
|
||
|
||
// homeDirForImageStore returns the home directory to use when an image store is configured
|
||
func (d *Driver) homeDirForImageStore() string {
|
||
if d.imageStore != "" {
|
||
return path.Join(d.imageStore, d.name)
|
||
}
|
||
// If there is not an image store configured, use the same
|
||
// store
|
||
return d.home
|
||
}
|
||
|
||
func (d *Driver) dir2(id string, useImageStore bool) (string, string, bool) {
|
||
homedir := d.home
|
||
if useImageStore {
|
||
homedir = d.homeDirForImageStore()
|
||
}
|
||
newpath := path.Join(homedir, id)
|
||
if err := fileutils.Exists(newpath); err != nil {
|
||
for _, p := range d.getAllImageStores() {
|
||
l := path.Join(p, d.name, id)
|
||
err = fileutils.Exists(l)
|
||
if err == nil {
|
||
return l, homedir, true
|
||
}
|
||
}
|
||
}
|
||
return newpath, homedir, false
|
||
}
|
||
|
||
func (d *Driver) getLowerDirs(id string) ([]string, error) {
|
||
var lowersArray []string
|
||
lowers, err := os.ReadFile(path.Join(d.dir(id), lowerFile))
|
||
if err == nil {
|
||
for _, s := range strings.Split(string(lowers), ":") {
|
||
lower := d.dir(s)
|
||
lp, err := os.Readlink(lower)
|
||
// if the link does not exist, we lost the symlinks during a sudden reboot.
|
||
// Let's go ahead and recreate those symlinks.
|
||
if err != nil {
|
||
if os.IsNotExist(err) {
|
||
logrus.Warnf("Can't read link %q because it does not exist. A storage corruption might have occurred, attempting to recreate the missing symlinks. It might be best wipe the storage to avoid further errors due to storage corruption.", lower)
|
||
if err := d.recreateSymlinks(); err != nil {
|
||
return nil, fmt.Errorf("recreating the missing symlinks: %w", err)
|
||
}
|
||
// let's call Readlink on lower again now that we have recreated the missing symlinks
|
||
lp, err = os.Readlink(lower)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
} else {
|
||
return nil, err
|
||
}
|
||
}
|
||
lowersArray = append(lowersArray, path.Clean(d.dir(path.Join("link", lp))))
|
||
}
|
||
} else if !os.IsNotExist(err) {
|
||
return nil, err
|
||
}
|
||
return lowersArray, nil
|
||
}
|
||
|
||
func (d *Driver) optsAppendMappings(opts string, uidMaps, gidMaps []idtools.IDMap) string {
|
||
if uidMaps != nil {
|
||
var uids, gids bytes.Buffer
|
||
if len(uidMaps) == 1 && uidMaps[0].Size == 1 {
|
||
uids.WriteString(fmt.Sprintf("squash_to_uid=%d", uidMaps[0].HostID))
|
||
} else {
|
||
uids.WriteString("uidmapping=")
|
||
for _, i := range uidMaps {
|
||
if uids.Len() > 0 {
|
||
uids.WriteString(":")
|
||
}
|
||
uids.WriteString(fmt.Sprintf("%d:%d:%d", i.ContainerID, i.HostID, i.Size))
|
||
}
|
||
}
|
||
if len(gidMaps) == 1 && gidMaps[0].Size == 1 {
|
||
gids.WriteString(fmt.Sprintf("squash_to_gid=%d", gidMaps[0].HostID))
|
||
} else {
|
||
gids.WriteString("gidmapping=")
|
||
for _, i := range gidMaps {
|
||
if gids.Len() > 0 {
|
||
gids.WriteString(":")
|
||
}
|
||
gids.WriteString(fmt.Sprintf("%d:%d:%d", i.ContainerID, i.HostID, i.Size))
|
||
}
|
||
}
|
||
return fmt.Sprintf("%s,%s,%s", opts, uids.String(), gids.String())
|
||
}
|
||
return opts
|
||
}
|
||
|
||
// Remove cleans the directories that are created for this id.
|
||
func (d *Driver) Remove(id string) error {
|
||
dir := d.dir(id)
|
||
lid, err := os.ReadFile(path.Join(dir, "link"))
|
||
if err == nil {
|
||
if err := os.RemoveAll(path.Join(d.home, linkDir, string(lid))); err != nil {
|
||
logrus.Debugf("Failed to remove link: %v", err)
|
||
}
|
||
}
|
||
|
||
d.releaseAdditionalLayerByID(id)
|
||
|
||
if err := system.EnsureRemoveAll(dir); err != nil && !os.IsNotExist(err) {
|
||
return err
|
||
}
|
||
if d.quotaCtl != nil {
|
||
d.quotaCtl.ClearQuota(dir)
|
||
if d.imageStore != "" {
|
||
d.quotaCtl.ClearQuota(d.imageStore)
|
||
}
|
||
}
|
||
return nil
|
||
}
|
||
|
||
// recreateSymlinks goes through the driver's home directory and checks if the diff directory
|
||
// under each layer has a symlink created for it under the linkDir. If the symlink does not
|
||
// exist, it creates them
|
||
func (d *Driver) recreateSymlinks() error {
|
||
// We have at most 3 corrective actions per layer, so 10 iterations is plenty.
|
||
const maxIterations = 10
|
||
|
||
// List all the directories under the home directory
|
||
dirs, err := os.ReadDir(d.home)
|
||
if err != nil {
|
||
return fmt.Errorf("reading driver home directory %q: %w", d.home, err)
|
||
}
|
||
// This makes the link directory if it doesn't exist
|
||
if err := idtools.MkdirAllAs(path.Join(d.home, linkDir), 0o755, 0, 0); err != nil {
|
||
return err
|
||
}
|
||
// Keep looping as long as we take some corrective action in each iteration
|
||
var errs error
|
||
madeProgress := true
|
||
iterations := 0
|
||
for madeProgress {
|
||
errs = nil
|
||
madeProgress = false
|
||
// Check that for each layer, there's a link in "l" with the name in
|
||
// the layer's "link" file that points to the layer's "diff" directory.
|
||
for _, dir := range dirs {
|
||
// Skip over the linkDir and anything that is not a directory
|
||
if dir.Name() == linkDir || !dir.IsDir() {
|
||
continue
|
||
}
|
||
// Read the "link" file under each layer to get the name of the symlink
|
||
data, err := os.ReadFile(path.Join(d.dir(dir.Name()), "link"))
|
||
if err != nil {
|
||
errs = errors.Join(errs, fmt.Errorf("reading name of symlink for %q: %w", dir.Name(), err))
|
||
continue
|
||
}
|
||
linkPath := path.Join(d.home, linkDir, strings.Trim(string(data), "\n"))
|
||
// Check if the symlink exists, and if it doesn't, create it again with the
|
||
// name we got from the "link" file
|
||
err = fileutils.Lexists(linkPath)
|
||
if err != nil && os.IsNotExist(err) {
|
||
if err := os.Symlink(path.Join("..", dir.Name(), "diff"), linkPath); err != nil {
|
||
errs = errors.Join(errs, err)
|
||
continue
|
||
}
|
||
madeProgress = true
|
||
} else if err != nil {
|
||
errs = errors.Join(errs, err)
|
||
continue
|
||
}
|
||
}
|
||
|
||
// linkDirFullPath is the full path to the linkDir
|
||
linkDirFullPath := filepath.Join(d.home, "l")
|
||
// Now check if we somehow lost a "link" file, by making sure
|
||
// that each symlink we have corresponds to one.
|
||
links, err := os.ReadDir(linkDirFullPath)
|
||
if err != nil {
|
||
errs = errors.Join(errs, err)
|
||
continue
|
||
}
|
||
// Go through all of the symlinks in the "l" directory
|
||
for _, link := range links {
|
||
// Read the symlink's target, which should be "../$layer/diff"
|
||
target, err := os.Readlink(filepath.Join(linkDirFullPath, link.Name()))
|
||
if err != nil {
|
||
errs = errors.Join(errs, err)
|
||
continue
|
||
}
|
||
targetComponents := strings.Split(target, string(os.PathSeparator))
|
||
if len(targetComponents) != 3 || targetComponents[0] != ".." || targetComponents[2] != "diff" {
|
||
errs = errors.Join(errs, fmt.Errorf("link target of %q looks weird: %q", link, target))
|
||
// force the link to be recreated on the next pass
|
||
if err := os.Remove(filepath.Join(linkDirFullPath, link.Name())); err != nil {
|
||
if !os.IsNotExist(err) {
|
||
errs = errors.Join(errs, fmt.Errorf("removing link %q: %w", link, err))
|
||
} // else don’t report any error, but also don’t set madeProgress.
|
||
continue
|
||
}
|
||
madeProgress = true
|
||
continue
|
||
}
|
||
// Reconstruct the name of the target's link file and check that
|
||
// it has the basename of our symlink in it.
|
||
targetID := targetComponents[1]
|
||
linkFile := filepath.Join(d.dir(targetID), "link")
|
||
data, err := os.ReadFile(linkFile)
|
||
if err != nil || string(data) != link.Name() {
|
||
// NOTE: If two or more links point to the same target, we will update linkFile
|
||
// with every value of link.Name(), and set madeProgress = true every time.
|
||
if err := os.WriteFile(linkFile, []byte(link.Name()), 0o644); err != nil {
|
||
errs = errors.Join(errs, fmt.Errorf("correcting link for layer %s: %w", targetID, err))
|
||
continue
|
||
}
|
||
madeProgress = true
|
||
}
|
||
}
|
||
iterations++
|
||
if iterations >= maxIterations {
|
||
errs = errors.Join(errs, fmt.Errorf("reached %d iterations in overlay graph driver’s recreateSymlink, giving up", iterations))
|
||
break
|
||
}
|
||
}
|
||
return errs
|
||
}
|
||
|
||
// Get creates and mounts the required file system for the given id and returns the mount path.
|
||
func (d *Driver) Get(id string, options graphdriver.MountOpts) (string, error) {
|
||
return d.get(id, false, options)
|
||
}
|
||
|
||
func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountOpts) (_ string, retErr error) {
|
||
dir, _, inAdditionalStore := d.dir2(id, false)
|
||
if err := fileutils.Exists(dir); err != nil {
|
||
return "", err
|
||
}
|
||
if _, err := redirectDiffIfAdditionalLayer(path.Join(dir, "diff"), true); err != nil {
|
||
return "", err
|
||
}
|
||
|
||
// user namespace requires this to move a directory from lower to upper.
|
||
rootUID, rootGID, err := idtools.GetRootUIDGID(options.UidMaps, options.GidMaps)
|
||
if err != nil {
|
||
return "", err
|
||
}
|
||
|
||
mergedDir := d.getMergedDir(id, dir, inAdditionalStore)
|
||
// Attempt to create the merged dir if it doesn't exist, but don't chown an already existing directory (it might be in an additional store)
|
||
if err := idtools.MkdirAllAndChownNew(mergedDir, 0o700, idtools.IDPair{UID: rootUID, GID: rootGID}); err != nil && !os.IsExist(err) {
|
||
return "", err
|
||
}
|
||
|
||
if count := d.ctr.Increment(mergedDir); count > 1 {
|
||
return mergedDir, nil
|
||
}
|
||
defer func() {
|
||
if retErr != nil {
|
||
if c := d.ctr.Decrement(mergedDir); c <= 0 {
|
||
if mntErr := unix.Unmount(mergedDir, 0); mntErr != nil {
|
||
// Ignore EINVAL, it means the directory is not a mount point and it can happen
|
||
// if the current function fails before the mount point is created.
|
||
if !errors.Is(mntErr, unix.EINVAL) {
|
||
logrus.Errorf("Unmounting %v: %v", mergedDir, mntErr)
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}()
|
||
|
||
readWrite := !inAdditionalStore
|
||
|
||
if !d.SupportsShifting() || options.DisableShifting {
|
||
disableShifting = true
|
||
}
|
||
|
||
logLevel := logrus.WarnLevel
|
||
if unshare.IsRootless() {
|
||
logLevel = logrus.DebugLevel
|
||
}
|
||
optsList := options.Options
|
||
|
||
needsIDMapping := !disableShifting && len(options.UidMaps) > 0 && len(options.GidMaps) > 0 && d.options.mountProgram == ""
|
||
|
||
if len(optsList) == 0 {
|
||
if d.options.mountOptions != "" {
|
||
optsList = strings.Split(d.options.mountOptions, ",")
|
||
}
|
||
} else {
|
||
// If metacopy=on is present in d.options.mountOptions it must be present in the mount
|
||
// options otherwise the kernel refuses to follow the metacopy xattr.
|
||
if hasMetacopyOption(strings.Split(d.options.mountOptions, ",")) && !hasMetacopyOption(options.Options) {
|
||
if d.usingMetacopy {
|
||
logrus.StandardLogger().Logf(logrus.DebugLevel, "Adding metacopy option, configured globally")
|
||
optsList = append(optsList, "metacopy=on")
|
||
}
|
||
}
|
||
}
|
||
if !d.usingMetacopy {
|
||
if hasMetacopyOption(optsList) {
|
||
if d.options.mountProgram == "" {
|
||
release := ""
|
||
var uts unix.Utsname
|
||
if err := unix.Uname(&uts); err == nil {
|
||
release = " " + string(uts.Release[:]) + " " + string(uts.Version[:])
|
||
}
|
||
logrus.StandardLogger().Logf(logLevel, "Ignoring global metacopy option, not supported with booted kernel %s", release)
|
||
} else {
|
||
logrus.Debugf("Ignoring global metacopy option, the mount program doesn't support it")
|
||
}
|
||
}
|
||
optsList = slices.DeleteFunc(optsList, func(opt string) bool {
|
||
return opt == "metacopy=on"
|
||
})
|
||
}
|
||
|
||
if slices.Contains(optsList, "ro") {
|
||
readWrite = false
|
||
}
|
||
|
||
lowers, err := os.ReadFile(path.Join(dir, lowerFile))
|
||
if err != nil && !os.IsNotExist(err) {
|
||
return "", err
|
||
}
|
||
splitLowers := strings.Split(string(lowers), ":")
|
||
if len(splitLowers) > maxDepth {
|
||
return "", errors.New("max depth exceeded")
|
||
}
|
||
|
||
// absLowers is the list of lowers as absolute paths.
|
||
absLowers := []string{}
|
||
|
||
diffN := 1
|
||
perms := defaultPerms
|
||
if d.options.forceMask != nil {
|
||
perms = *d.options.forceMask
|
||
}
|
||
permsKnown := false
|
||
st, err := os.Stat(filepath.Join(dir, nameWithSuffix("diff", diffN)))
|
||
if err == nil {
|
||
perms = st.Mode()
|
||
permsKnown = true
|
||
}
|
||
for err == nil {
|
||
absLowers = append(absLowers, filepath.Join(dir, nameWithSuffix("diff", diffN)))
|
||
diffN++
|
||
err = fileutils.Exists(filepath.Join(dir, nameWithSuffix("diff", diffN)))
|
||
}
|
||
|
||
idmappedMountProcessPid := -1
|
||
if needsIDMapping {
|
||
pid, cleanupFunc, err := idmap.CreateUsernsProcess(options.UidMaps, options.GidMaps)
|
||
if err != nil {
|
||
return "", err
|
||
}
|
||
idmappedMountProcessPid = pid
|
||
defer cleanupFunc()
|
||
}
|
||
|
||
skipIDMappingLayers := make(map[string]string)
|
||
|
||
composefsMounts := []string{}
|
||
defer func() {
|
||
for _, m := range composefsMounts {
|
||
defer func(m string) {
|
||
if err := unix.Unmount(m, unix.MNT_DETACH); err != nil {
|
||
logrus.Warnf("Unmount %q: %v", m, err)
|
||
}
|
||
}(m)
|
||
}
|
||
}()
|
||
|
||
composeFsLayers := []string{}
|
||
maybeAddComposefsMount := func(lowerID string, i int, readWrite bool) (string, error) {
|
||
composefsBlob := d.getComposefsData(lowerID)
|
||
if err := fileutils.Exists(composefsBlob); err != nil {
|
||
if os.IsNotExist(err) {
|
||
return "", nil
|
||
}
|
||
return "", err
|
||
}
|
||
logrus.Debugf("overlay: using composefs blob %s for lower %s", composefsBlob, lowerID)
|
||
|
||
if readWrite && i == 0 {
|
||
return "", fmt.Errorf("cannot mount a composefs layer as writeable")
|
||
}
|
||
|
||
dest := d.getStorePrivateDirectory(id, dir, fmt.Sprintf("composefs-layers/%d", i), inAdditionalStore)
|
||
if err := os.MkdirAll(dest, 0o700); err != nil {
|
||
return "", err
|
||
}
|
||
|
||
if err := mountComposefsBlob(composefsBlob, dest); err != nil {
|
||
return "", err
|
||
}
|
||
composefsMounts = append(composefsMounts, dest)
|
||
composeFsPath, err := d.getDiffPath(lowerID)
|
||
if err != nil {
|
||
return "", err
|
||
}
|
||
composeFsLayers = append(composeFsLayers, composeFsPath)
|
||
skipIDMappingLayers[composeFsPath] = composeFsPath
|
||
return dest, nil
|
||
}
|
||
|
||
diffDir := path.Join(dir, "diff")
|
||
|
||
if dest, err := maybeAddComposefsMount(id, 0, readWrite); err != nil {
|
||
return "", err
|
||
} else if dest != "" {
|
||
diffDir = dest
|
||
}
|
||
|
||
// For each lower, resolve its path, and append it and any additional diffN
|
||
// directories to the lowers list.
|
||
for i, l := range splitLowers {
|
||
if l == "" {
|
||
continue
|
||
}
|
||
|
||
lower := ""
|
||
newpath := path.Join(d.home, l)
|
||
if st, err := os.Stat(newpath); err != nil {
|
||
for _, p := range d.getAllImageStores() {
|
||
lower = path.Join(p, d.name, l)
|
||
if st2, err2 := os.Stat(lower); err2 == nil {
|
||
if !permsKnown {
|
||
perms = st2.Mode()
|
||
permsKnown = true
|
||
}
|
||
break
|
||
}
|
||
lower = ""
|
||
}
|
||
// if it is a "not found" error, that means the symlinks were lost in a sudden reboot
|
||
// so call the recreateSymlinks function to go through all the layer dirs and recreate
|
||
// the symlinks with the name from their respective "link" files
|
||
if lower == "" && os.IsNotExist(err) {
|
||
logrus.Warnf("Can't stat lower layer %q because it does not exist. Going through storage to recreate the missing symlinks.", newpath)
|
||
if err := d.recreateSymlinks(); err != nil {
|
||
return "", fmt.Errorf("recreating the missing symlinks: %w", err)
|
||
}
|
||
lower = newpath
|
||
} else if lower == "" {
|
||
return "", fmt.Errorf("can't stat lower layer %q: %w", newpath, err)
|
||
}
|
||
} else {
|
||
if !permsKnown {
|
||
perms = st.Mode()
|
||
permsKnown = true
|
||
}
|
||
lower = newpath
|
||
}
|
||
|
||
linkContent, err := os.Readlink(lower)
|
||
if err != nil {
|
||
return "", err
|
||
}
|
||
lowerID := filepath.Base(filepath.Dir(linkContent))
|
||
composefsMount, err := maybeAddComposefsMount(lowerID, i+1, readWrite)
|
||
if err != nil {
|
||
return "", err
|
||
}
|
||
if composefsMount != "" {
|
||
if needsIDMapping {
|
||
if err := idmap.CreateIDMappedMount(composefsMount, composefsMount, idmappedMountProcessPid); err != nil {
|
||
return "", fmt.Errorf("create mapped mount for %q: %w", composefsMount, err)
|
||
}
|
||
skipIDMappingLayers[composefsMount] = composefsMount
|
||
// overlay takes a reference on the mount, so it is safe to unmount
|
||
// the mapped idmounts as soon as the final overlay file system is mounted.
|
||
defer func() {
|
||
if err := unix.Unmount(composefsMount, unix.MNT_DETACH); err != nil {
|
||
logrus.Warnf("Unmount %q: %v", composefsMount, err)
|
||
}
|
||
}()
|
||
}
|
||
absLowers = append(absLowers, composefsMount)
|
||
continue
|
||
}
|
||
|
||
absLowers = append(absLowers, lower)
|
||
diffN = 1
|
||
err = fileutils.Exists(dumbJoin(lower, "..", nameWithSuffix("diff", diffN)))
|
||
for err == nil {
|
||
absLowers = append(absLowers, dumbJoin(lower, "..", nameWithSuffix("diff", diffN)))
|
||
diffN++
|
||
err = fileutils.Exists(dumbJoin(lower, "..", nameWithSuffix("diff", diffN)))
|
||
}
|
||
}
|
||
|
||
if len(composeFsLayers) > 0 {
|
||
optsList = append(optsList, "metacopy=on", "redirect_dir=on")
|
||
}
|
||
|
||
if len(absLowers) == 0 {
|
||
absLowers = append(absLowers, path.Join(dir, "empty"))
|
||
}
|
||
|
||
if err := idtools.MkdirAllAs(diffDir, perms, rootUID, rootGID); err != nil {
|
||
if !inAdditionalStore {
|
||
return "", err
|
||
}
|
||
// if it is in an additional store, do not fail if the directory already exists
|
||
if err2 := fileutils.Exists(diffDir); err2 != nil {
|
||
return "", err
|
||
}
|
||
}
|
||
|
||
workdir := path.Join(dir, "work")
|
||
|
||
if d.options.mountProgram == "" && unshare.IsRootless() {
|
||
optsList = append(optsList, "userxattr")
|
||
}
|
||
|
||
if options.Volatile && !slices.Contains(optsList, "volatile") {
|
||
supported, err := d.getSupportsVolatile()
|
||
if err != nil {
|
||
return "", err
|
||
}
|
||
// If "volatile" is not supported by the file system, just ignore the request
|
||
if supported {
|
||
optsList = append(optsList, "volatile")
|
||
}
|
||
}
|
||
|
||
if needsIDMapping {
|
||
var newAbsDir []string
|
||
idMappedMounts := make(map[string]string)
|
||
|
||
mappedRoot := filepath.Join(d.home, id, "mapped")
|
||
if err := os.MkdirAll(mappedRoot, 0o700); err != nil {
|
||
return "", err
|
||
}
|
||
|
||
// rewrite the lower dirs to their idmapped mount.
|
||
c := 0
|
||
for _, absLower := range absLowers {
|
||
mappedMountSrc := getMappedMountRoot(absLower)
|
||
|
||
if _, ok := skipIDMappingLayers[absLower]; ok {
|
||
newAbsDir = append(newAbsDir, absLower)
|
||
continue
|
||
}
|
||
|
||
root, found := idMappedMounts[mappedMountSrc]
|
||
if !found {
|
||
root = filepath.Join(mappedRoot, fmt.Sprintf("%d", c))
|
||
c++
|
||
if err := idmap.CreateIDMappedMount(mappedMountSrc, root, idmappedMountProcessPid); err != nil {
|
||
return "", fmt.Errorf("create mapped mount for %q on %q: %w", mappedMountSrc, root, err)
|
||
}
|
||
idMappedMounts[mappedMountSrc] = root
|
||
|
||
// overlay takes a reference on the mount, so it is safe to unmount
|
||
// the mapped idmounts as soon as the final overlay file system is mounted.
|
||
defer func() {
|
||
if err := unix.Unmount(root, unix.MNT_DETACH); err != nil {
|
||
logrus.Warnf("Unmount %q: %v", root, err)
|
||
}
|
||
}()
|
||
}
|
||
|
||
// relative path to the layer through the id mapped mount
|
||
rel, err := filepath.Rel(mappedMountSrc, absLower)
|
||
if err != nil {
|
||
return "", err
|
||
}
|
||
|
||
newAbsDir = append(newAbsDir, filepath.Join(root, rel))
|
||
}
|
||
absLowers = newAbsDir
|
||
}
|
||
|
||
lowerDirs := strings.Join(absLowers, ":")
|
||
if len(composeFsLayers) > 0 {
|
||
sep := "::"
|
||
supportsDataOnly, err := d.getSupportsDataOnly()
|
||
if err != nil {
|
||
return "", err
|
||
}
|
||
if !supportsDataOnly {
|
||
sep = ":"
|
||
}
|
||
composeFsLayersLowerDirs := strings.Join(composeFsLayers, sep)
|
||
lowerDirs = lowerDirs + sep + composeFsLayersLowerDirs
|
||
}
|
||
// absLowers is not valid anymore now as we have added composeFsLayers to it, so prevent
|
||
// its usage.
|
||
absLowers = nil //nolint:ineffassign
|
||
|
||
var opts string
|
||
if readWrite {
|
||
opts = fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", lowerDirs, diffDir, workdir)
|
||
} else {
|
||
opts = fmt.Sprintf("lowerdir=%s:%s", diffDir, lowerDirs)
|
||
}
|
||
if len(optsList) > 0 {
|
||
opts = fmt.Sprintf("%s,%s", opts, strings.Join(optsList, ","))
|
||
}
|
||
|
||
mountData := label.FormatMountLabel(opts, options.MountLabel)
|
||
mountFunc := unix.Mount
|
||
mountTarget := mergedDir
|
||
|
||
pageSize := unix.Getpagesize()
|
||
|
||
if d.options.mountProgram != "" {
|
||
mountFunc = func(source string, target string, mType string, flags uintptr, label string) error {
|
||
if !disableShifting {
|
||
label = d.optsAppendMappings(label, options.UidMaps, options.GidMaps)
|
||
}
|
||
|
||
// if forceMask is in place, tell fuse-overlayfs to write the permissions mask to an unprivileged xattr as well.
|
||
if d.options.forceMask != nil {
|
||
label = label + ",xattr_permissions=2"
|
||
}
|
||
|
||
mountProgram := exec.Command(d.options.mountProgram, "-o", label, target)
|
||
mountProgram.Dir = d.home
|
||
var b bytes.Buffer
|
||
mountProgram.Stderr = &b
|
||
err := mountProgram.Run()
|
||
if err != nil {
|
||
output := b.String()
|
||
if output == "" {
|
||
output = "<stderr empty>"
|
||
}
|
||
return fmt.Errorf("using mount program %s: %s: %w", d.options.mountProgram, output, err)
|
||
}
|
||
return nil
|
||
}
|
||
} else if len(mountData) >= pageSize {
|
||
// Use mountFrom when the mount data has exceeded the page size. The mount syscall fails if
|
||
// the mount data cannot fit within a page and relative links make the mount data much
|
||
// smaller at the expense of requiring a fork exec to chdir().
|
||
if readWrite {
|
||
diffDir := path.Join(id, "diff")
|
||
workDir := path.Join(id, "work")
|
||
opts = fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", lowerDirs, diffDir, workDir)
|
||
} else {
|
||
opts = fmt.Sprintf("lowerdir=%s:%s", diffDir, lowerDirs)
|
||
}
|
||
if len(optsList) > 0 {
|
||
opts = strings.Join(append([]string{opts}, optsList...), ",")
|
||
}
|
||
mountData = label.FormatMountLabel(opts, options.MountLabel)
|
||
mountFunc = func(source string, target string, mType string, flags uintptr, label string) error {
|
||
return mountOverlayFrom(d.home, source, target, mType, flags, label)
|
||
}
|
||
if !inAdditionalStore {
|
||
mountTarget = path.Join(id, "merged")
|
||
}
|
||
}
|
||
|
||
// overlay has a check in place to prevent mounting the same file system twice
|
||
// if volatile was already specified. Yes, the kernel repeats the "work" component.
|
||
err = os.RemoveAll(filepath.Join(workdir, "work", "incompat", "volatile"))
|
||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||
return "", err
|
||
}
|
||
|
||
flags, data := mount.ParseOptions(mountData)
|
||
logrus.Debugf("overlay: mount_data=%s", mountData)
|
||
if err := mountFunc("overlay", mountTarget, "overlay", uintptr(flags), data); err != nil {
|
||
return "", fmt.Errorf("creating overlay mount to %s, mount_data=%q: %w", mountTarget, mountData, err)
|
||
}
|
||
|
||
return mergedDir, nil
|
||
}
|
||
|
||
// getStorePrivateDirectory returns a directory path for storing data that requires exclusive access.
|
||
// If 'inAdditionalStore' is true, the path will be under the rundir, otherwise it will be placed in
|
||
// the primary store.
|
||
func (d *Driver) getStorePrivateDirectory(id, layerDir, subdir string, inAdditionalStore bool) string {
|
||
if inAdditionalStore {
|
||
return path.Join(d.runhome, id, subdir)
|
||
}
|
||
return path.Join(layerDir, subdir)
|
||
}
|
||
|
||
// getMergedDir returns the directory path that should be used as the mount point for the overlayfs.
|
||
func (d *Driver) getMergedDir(id, dir string, inAdditionalStore bool) string {
|
||
// Ordinarily, .Get() (layer mounting) callers are supposed to guarantee exclusion.
|
||
//
|
||
// But additional stores are initialized with RO locks and don’t support a write
|
||
// lock operation at all; and naiveDiff operations cause mounts/unmounts, so they might
|
||
// happen on code paths where we might only holding a RO lock for the additional store.
|
||
// To prevent races with other processes mounting or unmounting the layer,
|
||
// use a private directory under the main store rundir, not the "merged" directory inside the
|
||
// original layer store holding the layer data.
|
||
//
|
||
// To support this, contrary to the _general_ locking rules for .Diff / .Changes (which allow a RO lock),
|
||
// the top-level Store implementation uses an exclusive lock for the primary layer store;
|
||
// and since the rundir cannot be shared for different stores, it is safe to assume the
|
||
// current process has exclusive access to it.
|
||
//
|
||
// TO DO: LOCKING BUG: the .DiffSize operation does not currently hold an exclusive lock on the primary store.
|
||
// (_Some_ of the callers might be better ported to use a metadata-only size computation instead of DiffSize,
|
||
// but DiffSize probably needs to remain for computing sizes of container’s RW layers.)
|
||
return d.getStorePrivateDirectory(id, dir, "merged", inAdditionalStore)
|
||
}
|
||
|
||
// Put unmounts the mount path created for the give id.
|
||
func (d *Driver) Put(id string) error {
|
||
dir, _, inAdditionalStore := d.dir2(id, false)
|
||
if err := fileutils.Exists(dir); err != nil {
|
||
return err
|
||
}
|
||
mountpoint := d.getMergedDir(id, dir, inAdditionalStore)
|
||
|
||
if count := d.ctr.Decrement(mountpoint); count > 0 {
|
||
return nil
|
||
}
|
||
if err := fileutils.Exists(path.Join(dir, lowerFile)); err != nil && !os.IsNotExist(err) {
|
||
return err
|
||
}
|
||
|
||
unmounted := false
|
||
|
||
mappedRoot := filepath.Join(d.home, id, "mapped")
|
||
// It should not happen, but cleanup any mapped mount if it was leaked.
|
||
if err := fileutils.Exists(mappedRoot); err == nil {
|
||
mounts, err := os.ReadDir(mappedRoot)
|
||
if err == nil {
|
||
// Go through all of the mapped mounts.
|
||
for _, m := range mounts {
|
||
_ = unix.Unmount(filepath.Join(mappedRoot, m.Name()), unix.MNT_DETACH)
|
||
}
|
||
}
|
||
}
|
||
|
||
if d.options.mountProgram != "" {
|
||
// Attempt to unmount the FUSE mount using either fusermount or fusermount3.
|
||
// If they fail, fallback to unix.Unmount
|
||
for _, v := range []string{"fusermount3", "fusermount"} {
|
||
err := exec.Command(v, "-u", mountpoint).Run()
|
||
if err != nil && !errors.Is(err, exec.ErrNotFound) {
|
||
logrus.Debugf("Error unmounting %s with %s - %v", mountpoint, v, err)
|
||
}
|
||
if err == nil {
|
||
unmounted = true
|
||
break
|
||
}
|
||
}
|
||
// If fusermount|fusermount3 failed to unmount the FUSE file system, make sure all
|
||
// pending changes are propagated to the file system
|
||
if !unmounted {
|
||
fd, err := unix.Open(mountpoint, unix.O_DIRECTORY|unix.O_CLOEXEC, 0)
|
||
if err == nil {
|
||
if err := unix.Syncfs(fd); err != nil {
|
||
logrus.Debugf("Error Syncfs(%s) - %v", mountpoint, err)
|
||
}
|
||
unix.Close(fd)
|
||
}
|
||
}
|
||
}
|
||
|
||
if !unmounted {
|
||
if err := unix.Unmount(mountpoint, unix.MNT_DETACH); err != nil && !os.IsNotExist(err) {
|
||
logrus.Debugf("Failed to unmount %s overlay: %s - %v", id, mountpoint, err)
|
||
if !errors.Is(err, unix.EINVAL) {
|
||
return fmt.Errorf("unmounting %q: %w", mountpoint, err)
|
||
}
|
||
}
|
||
}
|
||
|
||
if inAdditionalStore {
|
||
// check the base name for extra safety
|
||
if strings.HasPrefix(mountpoint, d.runhome) && filepath.Base(mountpoint) == "merged" {
|
||
err := os.RemoveAll(filepath.Dir(mountpoint))
|
||
if err != nil {
|
||
logrus.Warningf("Failed to remove mountpoint %s overlay: %s: %v", id, mountpoint, err)
|
||
}
|
||
}
|
||
} else {
|
||
uid, gid := int(0), int(0)
|
||
fi, err := os.Stat(mountpoint)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
if stat, ok := fi.Sys().(*syscall.Stat_t); ok {
|
||
uid, gid = int(stat.Uid), int(stat.Gid)
|
||
}
|
||
|
||
tmpMountpoint := path.Join(dir, "merged.1")
|
||
if err := idtools.MkdirAs(tmpMountpoint, 0o700, uid, gid); err != nil && !errors.Is(err, os.ErrExist) {
|
||
return err
|
||
}
|
||
// rename(2) can be used on an empty directory, as it is the mountpoint after umount, and it retains
|
||
// its atomic semantic. In this way the "merged" directory is never removed.
|
||
if err := unix.Rename(tmpMountpoint, mountpoint); err != nil {
|
||
logrus.Debugf("Failed to replace mountpoint %s overlay: %s: %v", id, mountpoint, err)
|
||
return fmt.Errorf("replacing mount point %q: %w", mountpoint, err)
|
||
}
|
||
}
|
||
return nil
|
||
}
|
||
|
||
// Exists checks to see if the id is already mounted.
|
||
func (d *Driver) Exists(id string) bool {
|
||
err := fileutils.Exists(d.dir(id))
|
||
return err == nil
|
||
}
|
||
|
||
// List layers (not including additional image stores)
|
||
func (d *Driver) ListLayers() ([]string, error) {
|
||
entries, err := os.ReadDir(d.home)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
layers := make([]string, 0)
|
||
|
||
for _, entry := range entries {
|
||
id := entry.Name()
|
||
switch id {
|
||
case linkDir, stagingDir, quota.BackingFsBlockDeviceLink, mountProgramFlagFile:
|
||
// expected, but not a layer. skip it
|
||
continue
|
||
default:
|
||
// Does it look like a datadir directory?
|
||
if !entry.IsDir() {
|
||
continue
|
||
}
|
||
layers = append(layers, id)
|
||
}
|
||
}
|
||
return layers, nil
|
||
}
|
||
|
||
// isParent returns if the passed in parent is the direct parent of the passed in layer
|
||
func (d *Driver) isParent(id, parent string) bool {
|
||
lowers, err := d.getLowerDirs(id)
|
||
if err != nil {
|
||
return false
|
||
}
|
||
if parent == "" && len(lowers) > 0 {
|
||
return false
|
||
}
|
||
|
||
parentDir := d.dir(parent)
|
||
var ld string
|
||
if len(lowers) > 0 {
|
||
ld = filepath.Dir(lowers[0])
|
||
}
|
||
if ld == "" && parent == "" {
|
||
return true
|
||
}
|
||
return ld == parentDir
|
||
}
|
||
|
||
func (d *Driver) getWhiteoutFormat() archive.WhiteoutFormat {
|
||
whiteoutFormat := archive.OverlayWhiteoutFormat
|
||
if d.options.mountProgram != "" {
|
||
// If we are using a mount program, we are most likely running
|
||
// as an unprivileged user that cannot use mknod, so fallback to the
|
||
// AUFS whiteout format.
|
||
whiteoutFormat = archive.AUFSWhiteoutFormat
|
||
}
|
||
return whiteoutFormat
|
||
}
|
||
|
||
type overlayFileGetter struct {
|
||
diffDirs []string
|
||
composefsMounts map[string]*os.File // map from diff dir to the directory with the composefs blob mounted
|
||
}
|
||
|
||
func (g *overlayFileGetter) Get(path string) (io.ReadCloser, error) {
|
||
buf := make([]byte, unix.PathMax)
|
||
for _, d := range g.diffDirs {
|
||
if f, found := g.composefsMounts[d]; found {
|
||
// there is no *at equivalent for getxattr, but it can be emulated by opening the file under /proc/self/fd/$FD/$PATH
|
||
len, err := unix.Getxattr(fmt.Sprintf("/proc/self/fd/%d/%s", int(f.Fd()), path), "trusted.overlay.redirect", buf)
|
||
if err != nil {
|
||
if errors.Is(err, unix.ENODATA) {
|
||
continue
|
||
}
|
||
return nil, &fs.PathError{Op: "getxattr", Path: path, Err: err}
|
||
}
|
||
|
||
// the xattr value is the path to the file in the composefs layer diff directory
|
||
return os.Open(filepath.Join(d, string(buf[:len])))
|
||
}
|
||
|
||
f, err := os.Open(filepath.Join(d, path))
|
||
if err == nil {
|
||
return f, nil
|
||
}
|
||
}
|
||
if len(g.diffDirs) > 0 {
|
||
return os.Open(filepath.Join(g.diffDirs[0], path))
|
||
}
|
||
return nil, fmt.Errorf("%s: %w", path, os.ErrNotExist)
|
||
}
|
||
|
||
func (g *overlayFileGetter) Close() (errs error) {
|
||
for _, f := range g.composefsMounts {
|
||
if err := f.Close(); err != nil {
|
||
errs = errors.Join(errs, err)
|
||
}
|
||
if err := unix.Rmdir(f.Name()); err != nil {
|
||
errs = errors.Join(errs, err)
|
||
}
|
||
}
|
||
return errs
|
||
}
|
||
|
||
// newStagingDir creates a new staging directory and returns the path to it.
|
||
func (d *Driver) newStagingDir() (string, error) {
|
||
stagingDirBase := filepath.Join(d.homeDirForImageStore(), stagingDir)
|
||
err := os.MkdirAll(stagingDirBase, 0o700)
|
||
if err != nil && !os.IsExist(err) {
|
||
return "", err
|
||
}
|
||
return os.MkdirTemp(stagingDirBase, "")
|
||
}
|
||
|
||
// DiffGetter returns a FileGetCloser that can read files from the directory that
|
||
// contains files for the layer differences, either for this layer, or one of our
|
||
// lowers if we're just a template directory. Used for direct access for tar-split.
|
||
func (d *Driver) DiffGetter(id string) (_ graphdriver.FileGetCloser, Err error) {
|
||
p, err := d.getDiffPath(id)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
paths, err := d.getLowerDiffPaths(id)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
// map from diff dir to the directory with the composefs blob mounted
|
||
composefsMounts := make(map[string]*os.File)
|
||
defer func() {
|
||
if Err != nil {
|
||
for _, f := range composefsMounts {
|
||
f.Close()
|
||
if err := unix.Rmdir(f.Name()); err != nil && !os.IsNotExist(err) {
|
||
logrus.Warnf("Failed to remove %s: %v", f.Name(), err)
|
||
}
|
||
}
|
||
}
|
||
}()
|
||
diffDirs := append([]string{p}, paths...)
|
||
for _, diffDir := range diffDirs {
|
||
// diffDir has the form $GRAPH_ROOT/overlay/$ID/diff, so grab the $ID from the parent directory
|
||
id := path.Base(path.Dir(diffDir))
|
||
composefsData := d.getComposefsData(id)
|
||
if fileutils.Exists(composefsData) != nil {
|
||
// not a composefs layer, ignore it
|
||
continue
|
||
}
|
||
fd, err := openComposefsMount(composefsData)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
composefsMounts[diffDir] = os.NewFile(uintptr(fd), composefsData)
|
||
}
|
||
return &overlayFileGetter{diffDirs: diffDirs, composefsMounts: composefsMounts}, nil
|
||
}
|
||
|
||
// CleanupStagingDirectory cleanups the staging directory.
|
||
func (d *Driver) CleanupStagingDirectory(stagingDirectory string) error {
|
||
parentStagingDir := filepath.Dir(stagingDirectory)
|
||
|
||
d.stagingDirsLocksMutex.Lock()
|
||
if lock, ok := d.stagingDirsLocks[parentStagingDir]; ok {
|
||
delete(d.stagingDirsLocks, parentStagingDir)
|
||
lock.Unlock()
|
||
}
|
||
d.stagingDirsLocksMutex.Unlock()
|
||
|
||
return os.RemoveAll(parentStagingDir)
|
||
}
|
||
|
||
func supportsDataOnlyLayersCached(home, runhome string) (bool, error) {
|
||
feature := "dataonly-layers"
|
||
overlayCacheResult, _, err := cachedFeatureCheck(runhome, feature)
|
||
if err == nil {
|
||
if overlayCacheResult {
|
||
logrus.Debugf("Cached value indicated that data-only layers for overlay are supported")
|
||
return true, nil
|
||
}
|
||
logrus.Debugf("Cached value indicated that data-only layers for overlay are not supported")
|
||
return false, nil
|
||
}
|
||
supportsDataOnly, err := supportsDataOnlyLayers(home)
|
||
if err2 := cachedFeatureRecord(runhome, feature, supportsDataOnly, ""); err2 != nil {
|
||
return false, fmt.Errorf("recording overlay data-only layers support status: %w", err2)
|
||
}
|
||
return supportsDataOnly, err
|
||
}
|
||
|
||
// ApplyDiffWithDiffer applies the changes in the new layer using the specified function
|
||
func (d *Driver) ApplyDiffWithDiffer(options *graphdriver.ApplyDiffWithDifferOpts, differ graphdriver.Differ) (output graphdriver.DriverWithDifferOutput, errRet error) {
|
||
var idMappings *idtools.IDMappings
|
||
var forceMask *os.FileMode
|
||
|
||
if options != nil {
|
||
idMappings = options.Mappings
|
||
forceMask = options.ForceMask
|
||
}
|
||
if d.options.forceMask != nil {
|
||
forceMask = d.options.forceMask
|
||
}
|
||
|
||
if idMappings == nil {
|
||
idMappings = &idtools.IDMappings{}
|
||
}
|
||
|
||
layerDir, err := d.newStagingDir()
|
||
if err != nil {
|
||
return graphdriver.DriverWithDifferOutput{}, err
|
||
}
|
||
perms := defaultPerms
|
||
if forceMask != nil {
|
||
perms = *forceMask
|
||
}
|
||
applyDir := filepath.Join(layerDir, "dir")
|
||
if err := os.Mkdir(applyDir, perms); err != nil {
|
||
return graphdriver.DriverWithDifferOutput{}, err
|
||
}
|
||
|
||
lock, err := lockfile.GetLockFile(filepath.Join(layerDir, stagingLockFile))
|
||
if err != nil {
|
||
return graphdriver.DriverWithDifferOutput{}, err
|
||
}
|
||
defer func() {
|
||
if errRet != nil {
|
||
d.stagingDirsLocksMutex.Lock()
|
||
delete(d.stagingDirsLocks, layerDir)
|
||
d.stagingDirsLocksMutex.Unlock()
|
||
lock.Unlock()
|
||
}
|
||
}()
|
||
d.stagingDirsLocksMutex.Lock()
|
||
d.stagingDirsLocks[layerDir] = lock
|
||
d.stagingDirsLocksMutex.Unlock()
|
||
lock.Lock()
|
||
|
||
logrus.Debugf("Applying differ in %s", applyDir)
|
||
|
||
differOptions := graphdriver.DifferOptions{
|
||
Format: graphdriver.DifferOutputFormatDir,
|
||
}
|
||
if d.usingComposefs {
|
||
differOptions.Format = graphdriver.DifferOutputFormatFlat
|
||
differOptions.UseFsVerity = graphdriver.DifferFsVerityIfAvailable
|
||
}
|
||
out, err := differ.ApplyDiff(applyDir, &archive.TarOptions{
|
||
UIDMaps: idMappings.UIDs(),
|
||
GIDMaps: idMappings.GIDs(),
|
||
IgnoreChownErrors: d.options.ignoreChownErrors,
|
||
WhiteoutFormat: d.getWhiteoutFormat(),
|
||
InUserNS: unshare.IsRootless(),
|
||
ForceMask: forceMask,
|
||
}, &differOptions)
|
||
|
||
out.Target = applyDir
|
||
|
||
return out, err
|
||
}
|
||
|
||
// ApplyDiffFromStagingDirectory applies the changes using the specified staging directory.
|
||
func (d *Driver) ApplyDiffFromStagingDirectory(id, parent string, diffOutput *graphdriver.DriverWithDifferOutput, options *graphdriver.ApplyDiffWithDifferOpts) error {
|
||
stagingDirectory := diffOutput.Target
|
||
parentStagingDir := filepath.Dir(stagingDirectory)
|
||
|
||
defer func() {
|
||
d.stagingDirsLocksMutex.Lock()
|
||
if lock, ok := d.stagingDirsLocks[parentStagingDir]; ok {
|
||
delete(d.stagingDirsLocks, parentStagingDir)
|
||
lock.Unlock()
|
||
}
|
||
d.stagingDirsLocksMutex.Unlock()
|
||
}()
|
||
|
||
diffPath, err := d.getDiffPath(id)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
|
||
// If the current layer doesn't set the mode for the parent, override it with the parent layer's mode.
|
||
if d.options.forceMask == nil && diffOutput.RootDirMode == nil && parent != "" {
|
||
parentDiffPath, err := d.getDiffPath(parent)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
parentSt, err := os.Stat(parentDiffPath)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
if err := os.Chmod(stagingDirectory, parentSt.Mode()); err != nil {
|
||
return err
|
||
}
|
||
}
|
||
|
||
if d.usingComposefs {
|
||
toc := diffOutput.Artifacts[tocArtifact]
|
||
verityDigests := diffOutput.Artifacts[fsVerityDigestsArtifact].(map[string]string)
|
||
if err := generateComposeFsBlob(verityDigests, toc, d.getComposefsData(id)); err != nil {
|
||
return err
|
||
}
|
||
}
|
||
if err := os.RemoveAll(diffPath); err != nil && !os.IsNotExist(err) {
|
||
return err
|
||
}
|
||
|
||
return os.Rename(stagingDirectory, diffPath)
|
||
}
|
||
|
||
// DifferTarget gets the location where files are stored for the layer.
|
||
func (d *Driver) DifferTarget(id string) (string, error) {
|
||
return d.getDiffPath(id)
|
||
}
|
||
|
||
// ApplyDiff applies the new layer into a root
|
||
func (d *Driver) ApplyDiff(id, parent string, options graphdriver.ApplyDiffOpts) (size int64, err error) {
|
||
if !d.isParent(id, parent) {
|
||
if d.options.ignoreChownErrors {
|
||
options.IgnoreChownErrors = d.options.ignoreChownErrors
|
||
}
|
||
if d.options.forceMask != nil {
|
||
options.ForceMask = d.options.forceMask
|
||
}
|
||
return d.naiveDiff.ApplyDiff(id, parent, options)
|
||
}
|
||
|
||
idMappings := options.Mappings
|
||
if idMappings == nil {
|
||
idMappings = &idtools.IDMappings{}
|
||
}
|
||
|
||
applyDir, err := d.getDiffPath(id)
|
||
if err != nil {
|
||
return 0, err
|
||
}
|
||
|
||
logrus.Debugf("Applying tar in %s", applyDir)
|
||
// Overlay doesn't need the parent id to apply the diff
|
||
if err := untar(options.Diff, applyDir, &archive.TarOptions{
|
||
UIDMaps: idMappings.UIDs(),
|
||
GIDMaps: idMappings.GIDs(),
|
||
IgnoreChownErrors: d.options.ignoreChownErrors,
|
||
ForceMask: d.options.forceMask,
|
||
WhiteoutFormat: d.getWhiteoutFormat(),
|
||
InUserNS: unshare.IsRootless(),
|
||
}); err != nil {
|
||
return 0, err
|
||
}
|
||
|
||
return directory.Size(applyDir)
|
||
}
|
||
|
||
func (d *Driver) getComposefsData(id string) string {
|
||
dir := d.dir(id)
|
||
return path.Join(dir, "composefs-data")
|
||
}
|
||
|
||
func (d *Driver) getDiffPath(id string) (string, error) {
|
||
dir := d.dir(id)
|
||
return redirectDiffIfAdditionalLayer(path.Join(dir, "diff"), false)
|
||
}
|
||
|
||
func (d *Driver) getLowerDiffPaths(id string) ([]string, error) {
|
||
layers, err := d.getLowerDirs(id)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
for i, l := range layers {
|
||
layers[i], err = redirectDiffIfAdditionalLayer(l, false)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
}
|
||
return layers, nil
|
||
}
|
||
|
||
// DiffSize calculates the changes between the specified id
|
||
// and its parent and returns the size in bytes of the changes
|
||
// relative to its base filesystem directory.
|
||
func (d *Driver) DiffSize(id string, idMappings *idtools.IDMappings, parent string, parentMappings *idtools.IDMappings, mountLabel string) (size int64, err error) {
|
||
if !d.isParent(id, parent) {
|
||
return d.naiveDiff.DiffSize(id, idMappings, parent, parentMappings, mountLabel)
|
||
}
|
||
|
||
p, err := d.getDiffPath(id)
|
||
if err != nil {
|
||
return 0, err
|
||
}
|
||
return directory.Size(p)
|
||
}
|
||
|
||
// Diff produces an archive of the changes between the specified
|
||
// layer and its parent layer which may be "".
|
||
func (d *Driver) Diff(id string, idMappings *idtools.IDMappings, parent string, parentMappings *idtools.IDMappings, mountLabel string) (io.ReadCloser, error) {
|
||
if d.useNaiveDiff() || !d.isParent(id, parent) {
|
||
return d.naiveDiff.Diff(id, idMappings, parent, parentMappings, mountLabel)
|
||
}
|
||
|
||
if idMappings == nil {
|
||
idMappings = &idtools.IDMappings{}
|
||
}
|
||
|
||
lowerDirs, err := d.getLowerDiffPaths(id)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
diffPath, err := d.getDiffPath(id)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
logrus.Debugf("Tar with options on %s", diffPath)
|
||
return archive.TarWithOptions(diffPath, &archive.TarOptions{
|
||
Compression: archive.Uncompressed,
|
||
UIDMaps: idMappings.UIDs(),
|
||
GIDMaps: idMappings.GIDs(),
|
||
WhiteoutFormat: d.getWhiteoutFormat(),
|
||
WhiteoutData: lowerDirs,
|
||
})
|
||
}
|
||
|
||
// Changes produces a list of changes between the specified layer
|
||
// and its parent layer. If parent is "", then all changes will be ADD changes.
|
||
func (d *Driver) Changes(id string, idMappings *idtools.IDMappings, parent string, parentMappings *idtools.IDMappings, mountLabel string) ([]archive.Change, error) {
|
||
if d.useNaiveDiff() || !d.isParent(id, parent) {
|
||
return d.naiveDiff.Changes(id, idMappings, parent, parentMappings, mountLabel)
|
||
}
|
||
// Overlay doesn't have snapshots, so we need to get changes from all parent
|
||
// layers.
|
||
diffPath, err := d.getDiffPath(id)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("failed to get diff path: %w", err)
|
||
}
|
||
layers, err := d.getLowerDiffPaths(id)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("failed to get lower diff path: %w", err)
|
||
}
|
||
|
||
c, err := archive.OverlayChanges(layers, diffPath)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("computing changes: %w", err)
|
||
}
|
||
return c, nil
|
||
}
|
||
|
||
// AdditionalImageStores returns additional image stores supported by the driver
|
||
func (d *Driver) AdditionalImageStores() []string {
|
||
return d.options.imageStores
|
||
}
|
||
|
||
// UpdateLayerIDMap updates ID mappings in a from matching the ones specified
|
||
// by toContainer to those specified by toHost.
|
||
func (d *Driver) UpdateLayerIDMap(id string, toContainer, toHost *idtools.IDMappings, mountLabel string) error {
|
||
var err error
|
||
dir := d.dir(id)
|
||
diffDir := filepath.Join(dir, "diff")
|
||
|
||
rootUID, rootGID := 0, 0
|
||
if toHost != nil {
|
||
rootUID, rootGID, err = idtools.GetRootUIDGID(toHost.UIDs(), toHost.GIDs())
|
||
if err != nil {
|
||
return err
|
||
}
|
||
}
|
||
|
||
// Mount the new layer and handle ownership changes and possible copy_ups in it.
|
||
options := graphdriver.MountOpts{
|
||
MountLabel: mountLabel,
|
||
Options: strings.Split(d.options.mountOptions, ","),
|
||
}
|
||
layerFs, err := d.get(id, true, options)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
err = graphdriver.ChownPathByMaps(layerFs, toContainer, toHost)
|
||
if err != nil {
|
||
if err2 := d.Put(id); err2 != nil {
|
||
logrus.Errorf("%v; unmounting %v: %v", err, id, err2)
|
||
}
|
||
return err
|
||
}
|
||
if err = d.Put(id); err != nil {
|
||
return err
|
||
}
|
||
|
||
// Rotate the diff directories.
|
||
i := 0
|
||
perms := defaultPerms
|
||
st, err := os.Stat(nameWithSuffix(diffDir, i))
|
||
if d.options.forceMask != nil {
|
||
perms = *d.options.forceMask
|
||
} else {
|
||
if err == nil {
|
||
perms = st.Mode()
|
||
}
|
||
}
|
||
for err == nil {
|
||
i++
|
||
err = fileutils.Exists(nameWithSuffix(diffDir, i))
|
||
}
|
||
|
||
for i > 0 {
|
||
err = os.Rename(nameWithSuffix(diffDir, i-1), nameWithSuffix(diffDir, i))
|
||
if err != nil {
|
||
return err
|
||
}
|
||
i--
|
||
}
|
||
|
||
// We need to re-create the work directory as it might keep a reference
|
||
// to the old upper layer in the index.
|
||
workDir := filepath.Join(dir, "work")
|
||
if err := os.RemoveAll(workDir); err == nil {
|
||
if err := idtools.MkdirAs(workDir, defaultPerms, rootUID, rootGID); err != nil {
|
||
return err
|
||
}
|
||
}
|
||
|
||
// Re-create the directory that we're going to use as the upper layer.
|
||
if err := idtools.MkdirAs(diffDir, perms, rootUID, rootGID); err != nil {
|
||
return err
|
||
}
|
||
return nil
|
||
}
|
||
|
||
// supportsIDmappedMounts returns whether the kernel supports using idmapped mounts with
|
||
// overlay lower layers.
|
||
func (d *Driver) supportsIDmappedMounts() bool {
|
||
if d.supportsIDMappedMounts != nil {
|
||
return *d.supportsIDMappedMounts
|
||
}
|
||
|
||
supportsIDMappedMounts, err := checkAndRecordIDMappedSupport(d.home, d.runhome)
|
||
d.supportsIDMappedMounts = &supportsIDMappedMounts
|
||
if err == nil {
|
||
return supportsIDMappedMounts
|
||
}
|
||
logrus.Debugf("Check for idmapped mounts support %v", err)
|
||
return false
|
||
}
|
||
|
||
// SupportsShifting tells whether the driver support shifting of the UIDs/GIDs in an userNS
|
||
func (d *Driver) SupportsShifting() bool {
|
||
if os.Getenv("_CONTAINERS_OVERLAY_DISABLE_IDMAP") == "yes" {
|
||
return false
|
||
}
|
||
if d.options.mountProgram != "" {
|
||
return true
|
||
}
|
||
return d.supportsIDmappedMounts()
|
||
}
|
||
|
||
// dumbJoin is more or less a dumber version of filepath.Join, but one which
|
||
// won't Clean() the path, allowing us to append ".." as a component and trust
|
||
// pathname resolution to do some non-obvious work.
|
||
func dumbJoin(names ...string) string {
|
||
if len(names) == 0 {
|
||
return string(os.PathSeparator)
|
||
}
|
||
return strings.Join(names, string(os.PathSeparator))
|
||
}
|
||
|
||
func nameWithSuffix(name string, number int) string {
|
||
if number == 0 {
|
||
return name
|
||
}
|
||
return fmt.Sprintf("%s%d", name, number)
|
||
}
|
||
|
||
func validateOneAdditionalLayerPath(target string) error {
|
||
for _, p := range []string{
|
||
filepath.Join(target, "diff"),
|
||
filepath.Join(target, "info"),
|
||
filepath.Join(target, "blob"),
|
||
} {
|
||
if err := fileutils.Exists(p); err != nil {
|
||
return err
|
||
}
|
||
}
|
||
return nil
|
||
}
|
||
|
||
func (d *Driver) getAdditionalLayerPath(tocDigest digest.Digest, ref string) (string, error) {
|
||
refElem := base64.StdEncoding.EncodeToString([]byte(ref))
|
||
for _, ls := range d.options.layerStores {
|
||
ref := ""
|
||
if ls.withReference {
|
||
ref = refElem
|
||
}
|
||
target := path.Join(ls.path, ref, tocDigest.String())
|
||
err := validateOneAdditionalLayerPath(target)
|
||
if err == nil {
|
||
return target, nil
|
||
}
|
||
logrus.Debugf("additional Layer Store %v failed to stat additional layer: %v", ls, err)
|
||
}
|
||
|
||
return "", fmt.Errorf("additional layer (%q, %q) not found: %w", tocDigest, ref, graphdriver.ErrLayerUnknown)
|
||
}
|
||
|
||
func (d *Driver) releaseAdditionalLayerByID(id string) {
|
||
if al, err := d.getAdditionalLayerPathByID(id); err == nil {
|
||
notifyReleaseAdditionalLayer(al)
|
||
} else if !os.IsNotExist(err) {
|
||
logrus.Warnf("Unexpected error on reading Additional Layer Store pointer %v", err)
|
||
}
|
||
}
|
||
|
||
// additionalLayer represents a layer in Additional Layer Store.
|
||
type additionalLayer struct {
|
||
path string
|
||
d *Driver
|
||
releaseOnce sync.Once
|
||
}
|
||
|
||
// Info returns arbitrary information stored along with this layer (i.e. `info` file).
|
||
// This API is experimental and can be changed without bumping the major version number.
|
||
// TODO: to remove the comment once it's no longer experimental.
|
||
func (al *additionalLayer) Info() (io.ReadCloser, error) {
|
||
return os.Open(filepath.Join(al.path, "info"))
|
||
}
|
||
|
||
// Blob returns a reader of the raw contents of this layer.
|
||
func (al *additionalLayer) Blob() (io.ReadCloser, error) {
|
||
return os.Open(filepath.Join(al.path, "blob"))
|
||
}
|
||
|
||
// CreateAs creates a new layer from this additional layer.
|
||
// This API is experimental and can be changed without bumping the major version number.
|
||
// TODO: to remove the comment once it's no longer experimental.
|
||
func (al *additionalLayer) CreateAs(id, parent string) error {
|
||
// TODO: support opts
|
||
if err := al.d.Create(id, parent, nil); err != nil {
|
||
return err
|
||
}
|
||
dir := al.d.dir(id)
|
||
diffDir := path.Join(dir, "diff")
|
||
if err := os.RemoveAll(diffDir); err != nil {
|
||
return err
|
||
}
|
||
// tell the additional layer store that we use this layer.
|
||
// mark this layer as "additional layer"
|
||
if err := os.WriteFile(path.Join(dir, "additionallayer"), []byte(al.path), 0o644); err != nil {
|
||
return err
|
||
}
|
||
notifyUseAdditionalLayer(al.path)
|
||
return os.Symlink(filepath.Join(al.path, "diff"), diffDir)
|
||
}
|
||
|
||
func (d *Driver) getAdditionalLayerPathByID(id string) (string, error) {
|
||
al, err := os.ReadFile(path.Join(d.dir(id), "additionallayer"))
|
||
if err != nil {
|
||
return "", err
|
||
}
|
||
return string(al), nil
|
||
}
|
||
|
||
// Release tells the additional layer store that we don't use this handler.
|
||
// This API is experimental and can be changed without bumping the major version number.
|
||
// TODO: to remove the comment once it's no longer experimental.
|
||
func (al *additionalLayer) Release() {
|
||
// Tell the additional layer store that we don't use this layer handler.
|
||
// This will decrease the reference counter on the store's side, which was
|
||
// increased in LookupAdditionalLayer (so this must be called only once).
|
||
al.releaseOnce.Do(func() {
|
||
notifyReleaseAdditionalLayer(al.path)
|
||
})
|
||
}
|
||
|
||
// notifyUseAdditionalLayer notifies Additional Layer Store that we use the specified layer.
|
||
// This is done by creating "use" file in the layer directory. This is useful for
|
||
// Additional Layer Store to consider when to perform GC. Notification-aware Additional
|
||
// Layer Store must return ENOENT.
|
||
func notifyUseAdditionalLayer(al string) {
|
||
if !path.IsAbs(al) {
|
||
logrus.Warnf("additionallayer must be absolute (got: %v)", al)
|
||
return
|
||
}
|
||
useFile := path.Join(al, "use")
|
||
f, err := os.Create(useFile)
|
||
if os.IsNotExist(err) {
|
||
return
|
||
} else if err == nil {
|
||
f.Close()
|
||
if err := os.Remove(useFile); err != nil {
|
||
logrus.Warnf("Failed to remove use file")
|
||
}
|
||
}
|
||
logrus.Warnf("Unexpected error by Additional Layer Store %v during use; GC doesn't seem to be supported", err)
|
||
}
|
||
|
||
// notifyReleaseAdditionalLayer notifies Additional Layer Store that we don't use the specified
|
||
// layer anymore. This is done by rmdir-ing the layer directory. This is useful for
|
||
// Additional Layer Store to consider when to perform GC. Notification-aware Additional
|
||
// Layer Store must return ENOENT.
|
||
func notifyReleaseAdditionalLayer(al string) {
|
||
if !path.IsAbs(al) {
|
||
logrus.Warnf("additionallayer must be absolute (got: %v)", al)
|
||
return
|
||
}
|
||
// tell the additional layer store that we don't use this layer anymore.
|
||
err := unix.Rmdir(al)
|
||
if os.IsNotExist(err) {
|
||
return
|
||
}
|
||
logrus.Warnf("Unexpected error by Additional Layer Store %v during release; GC doesn't seem to be supported", err)
|
||
}
|
||
|
||
// redirectDiffIfAdditionalLayer checks if the passed diff path is Additional Layer and
|
||
// returns the redirected path. If the passed diff is not the one in Additional Layer
|
||
// Store, it returns the original path without changes.
|
||
func redirectDiffIfAdditionalLayer(diffPath string, checkExistence bool) (string, error) {
|
||
if ld, err := os.Readlink(diffPath); err == nil {
|
||
// diff is the link to Additional Layer Store
|
||
if !path.IsAbs(ld) {
|
||
return "", fmt.Errorf("linkpath must be absolute (got: %q)", ld)
|
||
}
|
||
if checkExistence {
|
||
if err := fileutils.Exists(ld); err != nil {
|
||
return "", fmt.Errorf("failed to access to the linked additional layer: %w", err)
|
||
}
|
||
}
|
||
diffPath = ld
|
||
} else if err.(*os.PathError).Err != syscall.EINVAL {
|
||
return "", err
|
||
}
|
||
return diffPath, nil
|
||
}
|
||
|
||
// getMappedMountRoot is a heuristic that calculates the parent directory where
|
||
// the idmapped mount should be applied.
|
||
// It is useful to minimize the number of idmapped mounts and at the same time use
|
||
// a common path as long as possible to reduce the length of the mount data argument.
|
||
func getMappedMountRoot(path string) string {
|
||
dirName := filepath.Dir(path)
|
||
if filepath.Base(dirName) == linkDir {
|
||
return filepath.Dir(dirName)
|
||
}
|
||
return dirName
|
||
}
|
||
|
||
// Dedup performs deduplication of the driver's storage.
|
||
func (d *Driver) Dedup(req graphdriver.DedupArgs) (graphdriver.DedupResult, error) {
|
||
var dirs []string
|
||
for _, layer := range req.Layers {
|
||
dir, _, inAdditionalStore := d.dir2(layer, false)
|
||
if inAdditionalStore {
|
||
continue
|
||
}
|
||
if err := fileutils.Exists(dir); err == nil {
|
||
dirs = append(dirs, filepath.Join(dir, "diff"))
|
||
}
|
||
}
|
||
r, err := dedup.DedupDirs(dirs, req.Options)
|
||
if err != nil {
|
||
return graphdriver.DedupResult{}, err
|
||
}
|
||
return graphdriver.DedupResult{Deduped: r.Deduped}, nil
|
||
}
|