From 70c193132dc84d85964d8eee0e383dffd9723863 Mon Sep 17 00:00:00 2001 From: Archana Shinde Date: Fri, 22 Mar 2019 16:45:27 -0700 Subject: [PATCH 1/3] mounts: Add check for system volumes We handle system directories differently, if its a bind mount we mount the guest system directory to the container mount and skip the 9p share mount. However, we should not do this for docker volumes which are directories created by Docker. This introduces a Docker specific check, but that is the only information available to us at the OCI layer. Signed-off-by: Archana Shinde --- virtcontainers/container.go | 6 +++++- virtcontainers/mount.go | 16 ++++++++++++++++ virtcontainers/mount_test.go | 10 ++++++++++ 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/virtcontainers/container.go b/virtcontainers/container.go index 337d07dec7..75fcdc98f2 100644 --- a/virtcontainers/container.go +++ b/virtcontainers/container.go @@ -477,7 +477,11 @@ func (c *Container) mountSharedDirMounts(hostSharedDir, guestSharedDir string) ( var sharedDirMounts []Mount var ignoredMounts []Mount for idx, m := range c.mounts { - if isSystemMount(m.Destination) || m.Type != "bind" { + if isSystemMount(m.Destination) && !IsDockerVolume(m.Source) { + continue + } + + if m.Type != "bind" { continue } diff --git a/virtcontainers/mount.go b/virtcontainers/mount.go index 5ffa7c5713..6dc6b62f38 100644 --- a/virtcontainers/mount.go +++ b/virtcontainers/mount.go @@ -326,3 +326,19 @@ func bindUnmountAllRootfs(ctx context.Context, sharedDir string, sandbox *Sandbo } } } + +const ( + dockerVolumePrefix = "/var/lib/docker/volumes" + dockerVolumeSuffix = "_data" +) + +// IsDockerVolume returns true if the given source path is +// a docker volume. +// This uses a very specific path that is used by docker. +func IsDockerVolume(path string) bool { + if strings.HasPrefix(path, dockerVolumePrefix) && filepath.Base(path) == dockerVolumeSuffix { + return true + } + return false +} + diff --git a/virtcontainers/mount_test.go b/virtcontainers/mount_test.go index 820fd7447c..f39fa3c489 100644 --- a/virtcontainers/mount_test.go +++ b/virtcontainers/mount_test.go @@ -282,3 +282,13 @@ func TestIsDeviceMapper(t *testing.T) { t.Fatal() } } + +func TestIsDockerVolume(t *testing.T) { + path := "/var/lib/docker/volumes/00da1347c7cf4f15db35f/_data" + isDockerVolume := IsDockerVolume(path) + assert.True(t, isDockerVolume) + + path = "/var/lib/testdir" + isDockerVolume := IsDockerVolume(path) + assert.False(t, isDockerVolume) +} From 228d1512d9273b3513e53d6305c598dbad9d7dd0 Mon Sep 17 00:00:00 2001 From: Archana Shinde Date: Mon, 25 Mar 2019 10:44:13 -0700 Subject: [PATCH 2/3] mount: Add check for k8s host empty directory k8s host empty-dir is equivalent to docker volumes. For this case, we should just use the host directory even for system directories. Move the isEphemeral function to virtcontainers to not introduce cyclic dependency. Fixes #1417 Signed-off-by: Archana Shinde --- pkg/katautils/create.go | 2 +- pkg/katautils/create_test.go | 2 +- pkg/katautils/utils.go | 28 -------------------- pkg/katautils/utils_test.go | 31 ---------------------- virtcontainers/container.go | 6 +++-- virtcontainers/mount.go | 50 ++++++++++++++++++++++++++++++++++++ virtcontainers/mount_test.go | 42 +++++++++++++++++++++++++++++- 7 files changed, 97 insertions(+), 64 deletions(-) diff --git a/pkg/katautils/create.go b/pkg/katautils/create.go index 63fab5c859..630b32c991 100644 --- a/pkg/katautils/create.go +++ b/pkg/katautils/create.go @@ -161,7 +161,7 @@ func HandleFactory(ctx context.Context, vci vc.VC, runtimeConfig *oci.RuntimeCon // of the same pod the already existing volume is reused. func SetEphemeralStorageType(ociSpec oci.CompatOCISpec) oci.CompatOCISpec { for idx, mnt := range ociSpec.Mounts { - if IsEphemeralStorage(mnt.Source) { + if vc.IsEphemeralStorage(mnt.Source) { ociSpec.Mounts[idx].Type = "ephemeral" } } diff --git a/pkg/katautils/create_test.go b/pkg/katautils/create_test.go index 3caa54d3e2..9a5c7d5f3c 100644 --- a/pkg/katautils/create_test.go +++ b/pkg/katautils/create_test.go @@ -190,7 +190,7 @@ func TestSetEphemeralStorageType(t *testing.T) { } defer os.RemoveAll(dir) - ephePath := filepath.Join(dir, k8sEmptyDir, "tmp-volume") + ephePath := filepath.Join(dir, vc.K8sEmptyDir, "tmp-volume") err = os.MkdirAll(ephePath, testDirMode) assert.Nil(err) diff --git a/pkg/katautils/utils.go b/pkg/katautils/utils.go index ae3ccecd36..12ab107fad 100644 --- a/pkg/katautils/utils.go +++ b/pkg/katautils/utils.go @@ -14,12 +14,6 @@ import ( "path/filepath" "strings" "syscall" - - vc "github.com/kata-containers/runtime/virtcontainers" -) - -const ( - k8sEmptyDir = "kubernetes.io~empty-dir" ) // FileExists test is a file exiting or not @@ -31,28 +25,6 @@ func FileExists(path string) bool { return true } -// IsEphemeralStorage returns true if the given path -// to the storage belongs to kubernetes ephemeral storage -// -// This method depends on a specific path used by k8s -// to detect if it's of type ephemeral. As of now, -// this is a very k8s specific solution that works -// but in future there should be a better way for this -// method to determine if the path is for ephemeral -// volume type -func IsEphemeralStorage(path string) bool { - splitSourceSlice := strings.Split(path, "/") - if len(splitSourceSlice) > 1 { - storageType := splitSourceSlice[len(splitSourceSlice)-2] - if storageType == k8sEmptyDir { - if _, fsType, _ := vc.GetDevicePathAndFsType(path); fsType == "tmpfs" { - return true - } - } - } - return false -} - // ResolvePath returns the fully resolved and expanded value of the // specified path. func ResolvePath(path string) (string, error) { diff --git a/pkg/katautils/utils_test.go b/pkg/katautils/utils_test.go index 4a1d7a5f11..af2238894e 100644 --- a/pkg/katautils/utils_test.go +++ b/pkg/katautils/utils_test.go @@ -366,34 +366,3 @@ func TestGetFileContents(t *testing.T) { assert.Equal(t, contents, d.contents) } } - -func TestIsEphemeralStorage(t *testing.T) { - if os.Geteuid() != 0 { - t.Skip(testDisabledNeedRoot) - } - - dir, err := ioutil.TempDir(testDir, "foo") - if err != nil { - t.Fatal(err) - } - defer os.RemoveAll(dir) - - sampleEphePath := filepath.Join(dir, k8sEmptyDir, "tmp-volume") - err = os.MkdirAll(sampleEphePath, testDirMode) - assert.Nil(t, err) - - err = syscall.Mount("tmpfs", sampleEphePath, "tmpfs", 0, "") - assert.Nil(t, err) - defer syscall.Unmount(sampleEphePath, 0) - - isEphe := IsEphemeralStorage(sampleEphePath) - if !isEphe { - t.Fatalf("Unable to correctly determine volume type") - } - - sampleEphePath = "/var/lib/kubelet/pods/366c3a75-4869-11e8-b479-507b9ddd5ce4/volumes/cache-volume" - isEphe = IsEphemeralStorage(sampleEphePath) - if isEphe { - t.Fatalf("Unable to correctly determine volume type") - } -} diff --git a/virtcontainers/container.go b/virtcontainers/container.go index 75fcdc98f2..f77662beda 100644 --- a/virtcontainers/container.go +++ b/virtcontainers/container.go @@ -477,8 +477,10 @@ func (c *Container) mountSharedDirMounts(hostSharedDir, guestSharedDir string) ( var sharedDirMounts []Mount var ignoredMounts []Mount for idx, m := range c.mounts { - if isSystemMount(m.Destination) && !IsDockerVolume(m.Source) { - continue + if isSystemMount(m.Destination) { + if !(IsDockerVolume(m.Source) || Isk8sHostEmptyDir(m.Source)) { + continue + } } if m.Type != "bind" { diff --git a/virtcontainers/mount.go b/virtcontainers/mount.go index 6dc6b62f38..c5f71cc211 100644 --- a/virtcontainers/mount.go +++ b/virtcontainers/mount.go @@ -342,3 +342,53 @@ func IsDockerVolume(path string) bool { return false } +const ( + // K8sEmptyDir is the k8s specific path for `empty-dir` volumes + K8sEmptyDir = "kubernetes.io~empty-dir" +) + +// IsEphemeralStorage returns true if the given path +// to the storage belongs to kubernetes ephemeral storage +// +// This method depends on a specific path used by k8s +// to detect if it's of type ephemeral. As of now, +// this is a very k8s specific solution that works +// but in future there should be a better way for this +// method to determine if the path is for ephemeral +// volume type +func IsEphemeralStorage(path string) bool { + if !isEmptyDir(path) { + return false + } + + if _, fsType, _ := GetDevicePathAndFsType(path); fsType == "tmpfs" { + return true + } + + return false +} + +// Isk8sHostEmptyDir returns true if the given path +// to the storage belongs to kubernetes empty-dir of medium "default" +// i.e volumes that are directories on the host. +func Isk8sHostEmptyDir(path string) bool { + if !isEmptyDir(path) { + return false + } + + if _, fsType, _ := GetDevicePathAndFsType(path); fsType != "tmpfs" { + return true + } + return false +} + +func isEmptyDir(path string) bool { + splitSourceSlice := strings.Split(path, "/") + if len(splitSourceSlice) > 1 { + storageType := splitSourceSlice[len(splitSourceSlice)-2] + if storageType == K8sEmptyDir { + return true + } + } + return false +} diff --git a/virtcontainers/mount_test.go b/virtcontainers/mount_test.go index f39fa3c489..7d0b3c4768 100644 --- a/virtcontainers/mount_test.go +++ b/virtcontainers/mount_test.go @@ -9,6 +9,8 @@ import ( "bytes" "context" "fmt" + "github.com/stretchr/testify/assert" + "io/ioutil" "os" "os/exec" "path/filepath" @@ -18,6 +20,11 @@ import ( "testing" ) +const ( + testDisabledNeedRoot = "Test disabled as requires root user" + testDirMode = os.FileMode(0750) +) + func TestIsSystemMount(t *testing.T) { tests := []struct { mnt string @@ -289,6 +296,39 @@ func TestIsDockerVolume(t *testing.T) { assert.True(t, isDockerVolume) path = "/var/lib/testdir" - isDockerVolume := IsDockerVolume(path) + isDockerVolume = IsDockerVolume(path) assert.False(t, isDockerVolume) } + +func TestIsEphemeralStorage(t *testing.T) { + if os.Geteuid() != 0 { + t.Skip(testDisabledNeedRoot) + } + + dir, err := ioutil.TempDir(testDir, "foo") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(dir) + + sampleEphePath := filepath.Join(dir, k8sEmptyDir, "tmp-volume") + err = os.MkdirAll(sampleEphePath, testDirMode) + assert.Nil(t, err) + + err = syscall.Mount("tmpfs", sampleEphePath, "tmpfs", 0, "") + assert.Nil(t, err) + defer syscall.Unmount(sampleEphePath, 0) + + isEphe := IsEphemeralStorage(sampleEphePath) + assert.True(t, isEphe) + + isHostEmptyDir := Isk8sHostEmptyDir(sampleEphePath) + assert.False(t, isHostEmptyDir) + + sampleEphePath = "/var/lib/kubelet/pods/366c3a75-4869-11e8-b479-507b9ddd5ce4/volumes/cache-volume" + isEphe = IsEphemeralStorage(sampleEphePath) + assert.False(t, isEphe) + + isHostEmptyDir = Isk8sHostEmptyDir(sampleEphePath) + assert.False(t, isHostEmptyDir) +} From 76c4639adac07908bee6a8a42d65a2c29f040e8a Mon Sep 17 00:00:00 2001 From: Alex Price Date: Fri, 5 Apr 2019 11:23:10 +1100 Subject: [PATCH 3/3] storage: create k8s emptyDir inside VM This introduces a new storage type: local. Local storage type will tell the kata-agent to create an empty directory in the sandbox directory within the VM. K8s host emptyDirs will then use the local storage type and mount it inside each container. By doing this, we utilise the storage medium that the sandbox uses. In most cases this will be 9p. If the VM is using device mapper for container storage, the containers will benefit from the better performance of device mapper for host emptyDir. Fixes #1472 Signed-off-by: Alex Price --- pkg/katautils/create.go | 5 ++- virtcontainers/kata_agent.go | 68 ++++++++++++++++++++++++------- virtcontainers/kata_agent_test.go | 2 +- virtcontainers/mount_test.go | 2 +- 4 files changed, 60 insertions(+), 17 deletions(-) diff --git a/pkg/katautils/create.go b/pkg/katautils/create.go index 630b32c991..de41c7ff49 100644 --- a/pkg/katautils/create.go +++ b/pkg/katautils/create.go @@ -162,7 +162,10 @@ func HandleFactory(ctx context.Context, vci vc.VC, runtimeConfig *oci.RuntimeCon func SetEphemeralStorageType(ociSpec oci.CompatOCISpec) oci.CompatOCISpec { for idx, mnt := range ociSpec.Mounts { if vc.IsEphemeralStorage(mnt.Source) { - ociSpec.Mounts[idx].Type = "ephemeral" + ociSpec.Mounts[idx].Type = vc.KataEphemeralDevType + } + if vc.Isk8sHostEmptyDir(mnt.Source) { + ociSpec.Mounts[idx].Type = vc.KataLocalDevType } } return ociSpec diff --git a/virtcontainers/kata_agent.go b/virtcontainers/kata_agent.go index 84352d84b7..91f2ed2d14 100644 --- a/virtcontainers/kata_agent.go +++ b/virtcontainers/kata_agent.go @@ -43,6 +43,15 @@ import ( grpcStatus "google.golang.org/grpc/status" ) +const ( + // KataEphemeralDevType creates a tmpfs backed volume for sharing files between containers. + KataEphemeralDevType = "ephemeral" + + // KataLocalDevType creates a local directory inside the VM for sharing files between + // containers. + KataLocalDevType = "local" +) + var ( checkRequestTimeout = 30 * time.Second defaultKataSocketName = "kata.sock" @@ -59,17 +68,17 @@ var ( vsockSocketScheme = "vsock" // port numbers below 1024 are called privileged ports. Only a process with // CAP_NET_BIND_SERVICE capability may bind to these port numbers. - vSockPort = 1024 - kata9pDevType = "9p" - kataMmioBlkDevType = "mmioblk" - kataBlkDevType = "blk" - kataSCSIDevType = "scsi" - kataNvdimmDevType = "nvdimm" - sharedDir9pOptions = []string{"trans=virtio,version=9p2000.L,cache=mmap", "nodev"} - shmDir = "shm" - kataEphemeralDevType = "ephemeral" - ephemeralPath = filepath.Join(kataGuestSandboxDir, kataEphemeralDevType) - grpcMaxDataSize = int64(1024 * 1024) + vSockPort = 1024 + kata9pDevType = "9p" + kataMmioBlkDevType = "mmioblk" + kataBlkDevType = "blk" + kataSCSIDevType = "scsi" + kataNvdimmDevType = "nvdimm" + sharedDir9pOptions = []string{"trans=virtio,version=9p2000.L,cache=mmap", "nodev"} + shmDir = "shm" + ephemeralPath = filepath.Join(kataGuestSandboxDir, KataEphemeralDevType) + grpcMaxDataSize = int64(1024 * 1024) + localDirOptions = []string{"mode=0777"} ) // KataAgentConfig is a structure storing information needed @@ -672,7 +681,7 @@ func (k *kataAgent) startSandbox(sandbox *Sandbox) error { shmSizeOption := fmt.Sprintf("size=%d", sandbox.shmSize) shmStorage := &grpc.Storage{ - Driver: kataEphemeralDevType, + Driver: KataEphemeralDevType, MountPoint: path, Source: "shm", Fstype: "tmpfs", @@ -1038,6 +1047,9 @@ func (k *kataAgent) createContainer(sandbox *Sandbox, c *Container) (p *Process, epheStorages := k.handleEphemeralStorage(ociSpec.Mounts) ctrStorages = append(ctrStorages, epheStorages...) + localStorages := k.handleLocalStorage(ociSpec.Mounts, sandbox.id) + ctrStorages = append(ctrStorages, localStorages...) + // We replace all OCI mount sources that match our container mount // with the right source path (The guest one). if err = k.replaceOCIMountSource(ociSpec, newMounts); err != nil { @@ -1116,14 +1128,14 @@ func (k *kataAgent) createContainer(sandbox *Sandbox, c *Container) (p *Process, func (k *kataAgent) handleEphemeralStorage(mounts []specs.Mount) []*grpc.Storage { var epheStorages []*grpc.Storage for idx, mnt := range mounts { - if mnt.Type == kataEphemeralDevType { + if mnt.Type == KataEphemeralDevType { // Set the mount source path to a path that resides inside the VM mounts[idx].Source = filepath.Join(ephemeralPath, filepath.Base(mnt.Source)) // Create a storage struct so that kata agent is able to create // tmpfs backed volume inside the VM epheStorage := &grpc.Storage{ - Driver: kataEphemeralDevType, + Driver: KataEphemeralDevType, Source: "tmpfs", Fstype: "tmpfs", MountPoint: mounts[idx].Source, @@ -1134,6 +1146,34 @@ func (k *kataAgent) handleEphemeralStorage(mounts []specs.Mount) []*grpc.Storage return epheStorages } +// handleLocalStorage handles local storage within the VM +// by creating a directory in the VM from the source of the mount point. +func (k *kataAgent) handleLocalStorage(mounts []specs.Mount, sandboxID string) []*grpc.Storage { + var localStorages []*grpc.Storage + for idx, mnt := range mounts { + if mnt.Type == KataLocalDevType { + // Set the mount source path to a the desired directory point in the VM. + // In this case it is located in the sandbox directory. + // We rely on the fact that the first container in the VM has the same ID as the sandbox ID. + // In Kubernetes, this is usually the pause container and we depend on it existing for + // local directories to work. + mounts[idx].Source = filepath.Join(kataGuestSharedDir, sandboxID, KataLocalDevType, filepath.Base(mnt.Source)) + + // Create a storage struct so that the kata agent is able to create the + // directory inside the VM. + localStorage := &grpc.Storage{ + Driver: KataLocalDevType, + Source: KataLocalDevType, + Fstype: KataLocalDevType, + MountPoint: mounts[idx].Source, + Options: localDirOptions, + } + localStorages = append(localStorages, localStorage) + } + } + return localStorages +} + // handleBlockVolumes handles volumes that are block devices files // by passing the block devices as Storage to the agent. func (k *kataAgent) handleBlockVolumes(c *Container) []*grpc.Storage { diff --git a/virtcontainers/kata_agent_test.go b/virtcontainers/kata_agent_test.go index bdc869a0e3..ef3a1ad4e2 100644 --- a/virtcontainers/kata_agent_test.go +++ b/virtcontainers/kata_agent_test.go @@ -365,7 +365,7 @@ func TestHandleEphemeralStorage(t *testing.T) { mountSource := "/tmp/mountPoint" mount := specs.Mount{ - Type: kataEphemeralDevType, + Type: KataEphemeralDevType, Source: mountSource, } diff --git a/virtcontainers/mount_test.go b/virtcontainers/mount_test.go index 7d0b3c4768..2edc87e4a7 100644 --- a/virtcontainers/mount_test.go +++ b/virtcontainers/mount_test.go @@ -311,7 +311,7 @@ func TestIsEphemeralStorage(t *testing.T) { } defer os.RemoveAll(dir) - sampleEphePath := filepath.Join(dir, k8sEmptyDir, "tmp-volume") + sampleEphePath := filepath.Join(dir, K8sEmptyDir, "tmp-volume") err = os.MkdirAll(sampleEphePath, testDirMode) assert.Nil(t, err)