diff --git a/src/runtime/virtcontainers/container.go b/src/runtime/virtcontainers/container.go index 68a48bdf26..02619ff532 100644 --- a/src/runtime/virtcontainers/container.go +++ b/src/runtime/virtcontainers/container.go @@ -18,6 +18,7 @@ import ( "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/config" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/manager" + "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/agent/protocols/grpc" vccgroups "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/cgroups" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/rootless" vcTypes "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/types" @@ -511,8 +512,7 @@ func (c *Container) shareFiles(ctx context.Context, m Mount, idx int) (string, b // It also updates the container mount list with the HostPath info, and store // container mounts to the storage. This way, we will have the HostPath info // available when we will need to unmount those mounts. -func (c *Container) mountSharedDirMounts(ctx context.Context, sharedDirMounts, ignoredMounts map[string]Mount) (err error) { - +func (c *Container) mountSharedDirMounts(ctx context.Context, sharedDirMounts, ignoredMounts map[string]Mount) (storages []*grpc.Storage, err error) { var devicesToDetach []string defer func() { if err != nil { @@ -534,7 +534,7 @@ func (c *Container) mountSharedDirMounts(ctx context.Context, sharedDirMounts, i if len(m.BlockDeviceID) > 0 { // Attach this block device, all other devices passed in the config have been attached at this point if err = c.sandbox.devManager.AttachDevice(ctx, m.BlockDeviceID, c.sandbox); err != nil { - return err + return storages, err } devicesToDetach = append(devicesToDetach, m.BlockDeviceID) continue @@ -563,7 +563,7 @@ func (c *Container) mountSharedDirMounts(ctx context.Context, sharedDirMounts, i var guestDest string guestDest, ignore, err = c.shareFiles(ctx, m, idx) if err != nil { - return err + return storages, err } // Expand the list of mounts to ignore. @@ -571,7 +571,6 @@ func (c *Container) mountSharedDirMounts(ctx context.Context, sharedDirMounts, i ignoredMounts[m.Source] = Mount{Source: m.Source} continue } - sharedDirMount := Mount{ Source: guestDest, Destination: m.Destination, @@ -580,10 +579,46 @@ func (c *Container) mountSharedDirMounts(ctx context.Context, sharedDirMounts, i ReadOnly: m.ReadOnly, } + // virtiofs does not support inotify. To workaround this limitation, we want to special case + // mounts that are commonly 'watched'. "watchable" mounts include: + // - Kubernetes configmap + // - Kubernetes secret + // If we identify one of these, we'll need to carry out polling in the guest in order to present the + // container with a mount that supports inotify. To do this, we create a Storage object for + // the "watchable-bind" driver. This will have the agent create a new mount that is watchable, + // who's effective source is the original mount (the agent will poll the original mount for changes and + // manually update the path that is mounted into the container). + // Based on this, let's make sure we update the sharedDirMount structure with the new watchable-mount as + // the source (this is what is utilized to update the OCI spec). + caps := c.sandbox.hypervisor.capabilities(ctx) + if isWatchableMount(m.Source) && caps.IsFsSharingSupported() { + + // Create path in shared directory for creating watchable mount: + watchableHostPath := filepath.Join(getMountPath(c.sandboxID), "watchable") + if err := os.MkdirAll(watchableHostPath, DirMode); err != nil { + return storages, fmt.Errorf("unable to create watchable path: %s: %v", watchableHostPath, err) + } + + watchableGuestMount := filepath.Join(kataGuestSharedDir(), "watchable", filepath.Base(guestDest)) + + storage := &grpc.Storage{ + Driver: kataWatchableBindDevType, + Source: guestDest, + Fstype: "bind", + MountPoint: watchableGuestMount, + Options: m.Options, + } + storages = append(storages, storage) + + // Update the sharedDirMount, in order to identify what will + // change in the OCI spec. + sharedDirMount.Source = watchableGuestMount + } + sharedDirMounts[sharedDirMount.Destination] = sharedDirMount } - return nil + return storages, nil } func (c *Container) unmountHostMounts(ctx context.Context) error { diff --git a/src/runtime/virtcontainers/container_test.go b/src/runtime/virtcontainers/container_test.go index b839c0d911..2ef2b0b95d 100644 --- a/src/runtime/virtcontainers/container_test.go +++ b/src/runtime/virtcontainers/container_test.go @@ -548,3 +548,91 @@ func TestProcessIOStream(t *testing.T) { _, _, _, err = c.ioStream(processID) assert.Error(err) } + +func TestMountSharedDirMounts(t *testing.T) { + if os.Getuid() != 0 { + t.Skip("Test disabled as requires root user") + } + + assert := assert.New(t) + + testMountPath, err := ioutil.TempDir("", "sandbox-test") + assert.NoError(err) + defer os.RemoveAll(testMountPath) + + // create a new shared directory for our test: + kataHostSharedDirSaved := kataHostSharedDir + testHostDir, err := ioutil.TempDir("", "kata-cleanup") + assert.NoError(err) + kataHostSharedDir = func() string { + return testHostDir + } + defer func() { + kataHostSharedDir = kataHostSharedDirSaved + }() + + // Create a kata agent + k := &kataAgent{ctx: context.Background()} + + // Create sandbox + sandbox := &Sandbox{ + ctx: context.Background(), + id: "foobar", + hypervisor: &mockHypervisor{}, + config: &SandboxConfig{ + HypervisorConfig: HypervisorConfig{ + BlockDeviceDriver: config.VirtioBlock, + }, + }, + } + // setup the shared mounts: + k.setupSharedPath(k.ctx, sandbox) + + // + // Create the mounts that we'll test with + // + secretpath := filepath.Join(testMountPath, K8sSecret) + err = os.MkdirAll(secretpath, 0777) + assert.NoError(err) + secret := filepath.Join(secretpath, "super-secret-thing") + _, err = os.Create(secret) + assert.NoError(err) + + mountDestination := "/fluffhead/token" + // + // Create container to utilize this mount/secret + // + container := Container{ + sandbox: sandbox, + sandboxID: "foobar", + id: "test-ctr", + mounts: []Mount{ + { + Source: secret, + Destination: mountDestination, + Type: "bind", + }, + }, + } + + updatedMounts := make(map[string]Mount) + ignoredMounts := make(map[string]Mount) + storage, err := container.mountSharedDirMounts(k.ctx, updatedMounts, ignoredMounts) + assert.NoError(err) + + // Look at the resulting hostpath that was created. Since a unique ID is applied, we'll use this + // to verify the updated mounts and storage object + hostMountName := filepath.Base(container.mounts[0].HostPath) + expectedStorageSource := filepath.Join(defaultKataGuestSharedDir, hostMountName) + expectedStorageDest := filepath.Join(defaultKataGuestSharedDir, "watchable", hostMountName) + + // We expect a single new storage object who's source is the original mount's base path and desitation is same with -watchable appended: + assert.Equal(len(storage), 1) + assert.Equal(expectedStorageSource, storage[0].Source) + assert.Equal(expectedStorageDest, storage[0].MountPoint) + + // We expect a single updated mount, who's source is the watchable mount path, and destination remains unchanged: + assert.Equal(len(updatedMounts), 1) + assert.Equal(updatedMounts[mountDestination].Source, expectedStorageDest) + assert.Equal(updatedMounts[mountDestination].Destination, mountDestination) +} diff --git a/src/runtime/virtcontainers/kata_agent.go b/src/runtime/virtcontainers/kata_agent.go index fcf671186f..9f3aae1c00 100644 --- a/src/runtime/virtcontainers/kata_agent.go +++ b/src/runtime/virtcontainers/kata_agent.go @@ -83,6 +83,7 @@ var ( kataSCSIDevType = "scsi" kataNvdimmDevType = "nvdimm" kataVirtioFSDevType = "virtio-fs" + kataWatchableBindDevType = "watchable-bind" sharedDir9pOptions = []string{"trans=virtio,version=9p2000.L,cache=mmap", "nodev"} sharedDirVirtioFSOptions = []string{} sharedDirVirtioFSDaxOptions = "dax" @@ -1343,10 +1344,11 @@ func (k *kataAgent) createContainer(ctx context.Context, sandbox *Sandbox, c *Co sharedDirMounts := make(map[string]Mount) ignoredMounts := make(map[string]Mount) - err = c.mountSharedDirMounts(ctx, sharedDirMounts, ignoredMounts) + shareStorages, err := c.mountSharedDirMounts(ctx, sharedDirMounts, ignoredMounts) if err != nil { return nil, err } + ctrStorages = append(ctrStorages, shareStorages...) k.handleShm(ociSpec.Mounts, sandbox) diff --git a/src/runtime/virtcontainers/mock_hypervisor.go b/src/runtime/virtcontainers/mock_hypervisor.go index 18487aabd8..e9e6f5b03f 100644 --- a/src/runtime/virtcontainers/mock_hypervisor.go +++ b/src/runtime/virtcontainers/mock_hypervisor.go @@ -21,7 +21,9 @@ type mockHypervisor struct { } func (m *mockHypervisor) capabilities(ctx context.Context) types.Capabilities { - return types.Capabilities{} + caps := types.Capabilities{} + caps.SetFsSharingSupport() + return caps } func (m *mockHypervisor) hypervisorConfig() HypervisorConfig { diff --git a/src/runtime/virtcontainers/mount.go b/src/runtime/virtcontainers/mount.go index 05de157544..322c53214d 100644 --- a/src/runtime/virtcontainers/mount.go +++ b/src/runtime/virtcontainers/mount.go @@ -9,6 +9,7 @@ import ( "context" "errors" "fmt" + "io/ioutil" "os" "path/filepath" "strings" @@ -408,7 +409,9 @@ func IsDockerVolume(path string) bool { const ( // K8sEmptyDir is the k8s specific path for `empty-dir` volumes - K8sEmptyDir = "kubernetes.io~empty-dir" + K8sEmptyDir = "kubernetes.io~empty-dir" + K8sConfigMap = "kubernetes.io~configmap" + K8sSecret = "kubernetes.io~secret" ) // IsEphemeralStorage returns true if the given path @@ -446,13 +449,78 @@ func Isk8sHostEmptyDir(path string) bool { return false } -func isEmptyDir(path string) bool { +func checkKubernetesVolume(path, volumeType string) bool { splitSourceSlice := strings.Split(path, "/") if len(splitSourceSlice) > 1 { storageType := splitSourceSlice[len(splitSourceSlice)-2] - if storageType == K8sEmptyDir { + if storageType == volumeType { return true } } + + return false +} + +func isEmptyDir(path string) bool { + return checkKubernetesVolume(path, K8sEmptyDir) +} + +func isConfigMap(path string) bool { + return checkKubernetesVolume(path, K8sConfigMap) +} + +func isSecret(path string) bool { + return checkKubernetesVolume(path, K8sSecret) +} + +// countFiles will return the number of files within a given path. If the total number of +// files observed is greater than limit, break and return -1 +func countFiles(path string, limit int) (numFiles int, err error) { + + // First, check to see if the path exists + file, err := os.Stat(path) + if os.IsNotExist(err) { + return 0, err + } + + // Special case if this is just a file, not a directory: + if !file.IsDir() { + return 1, nil + } + + files, err := ioutil.ReadDir(path) + if err != nil { + return 0, err + } + + for _, file := range files { + if file.IsDir() { + inc, err := countFiles(filepath.Join(path, file.Name()), (limit - numFiles)) + if err != nil { + return numFiles, err + } + numFiles = numFiles + inc + } else { + numFiles++ + } + if numFiles > limit { + return -1, nil + } + } + return numFiles, nil +} + +func isWatchableMount(path string) bool { + if isSecret(path) || isConfigMap(path) { + // we have a cap on number of FDs which can be present in mount + // to determine if watchable. A similar check exists within the agent, + // which may or may not help handle case where extra files are added to + // a mount after the fact + count, _ := countFiles(path, 8) + if count > 0 { + return true + } + } + return false } diff --git a/src/runtime/virtcontainers/mount_test.go b/src/runtime/virtcontainers/mount_test.go index 678d584d46..481d58fd18 100644 --- a/src/runtime/virtcontainers/mount_test.go +++ b/src/runtime/virtcontainers/mount_test.go @@ -267,6 +267,98 @@ func TestIsEphemeralStorage(t *testing.T) { assert.False(isHostEmptyDir) } +func TestIsEmtpyDir(t *testing.T) { + assert := assert.New(t) + path := "/var/lib/kubelet/pods/5f0861a0-a987-4a3a-bb0f-1058ddb9678f/volumes/kubernetes.io~empty-dir/foobar" + result := isEmptyDir(path) + assert.True(result) + + // expect the empty-dir to be second to last in path + result = isEmptyDir(filepath.Join(path, "bazzzzz")) + assert.False(result) +} + +func TestIsConfigMap(t *testing.T) { + assert := assert.New(t) + path := "/var/lib/kubelet/pods/5f0861a0-a987-4a3a-bb0f-1058ddb9678f/volumes/kubernetes.io~configmap/config" + result := isConfigMap(path) + assert.True(result) + + // expect the empty-dir to be second to last in path + result = isConfigMap(filepath.Join(path, "bazzzzz")) + assert.False(result) + +} +func TestIsSecret(t *testing.T) { + assert := assert.New(t) + path := "/var/lib/kubelet/pods/5f0861a0-a987-4a3a-bb0f-1058ddb9678f/volumes/kubernetes.io~secret" + result := isSecret(path) + assert.False(result) + + // expect the empty-dir to be second to last in path + result = isSecret(filepath.Join(path, "sweet-token")) + assert.True(result) + + result = isConfigMap(filepath.Join(path, "sweet-token-dir", "whoops")) + assert.False(result) +} + +func TestIsWatchable(t *testing.T) { + if os.Getuid() != 0 { + t.Skip("Test disabled as requires root user") + } + + assert := assert.New(t) + + path := "" + result := isWatchableMount(path) + assert.False(result) + + // path does not exist, failure expected: + path = "/var/lib/kubelet/pods/5f0861a0-a987-4a3a-bb0f-1058ddb9678f/volumes/kubernetes.io~empty-dir/foobar" + result = isWatchableMount(path) + assert.False(result) + + testPath, err := ioutil.TempDir("", "") + assert.NoError(err) + defer os.RemoveAll(testPath) + + // Verify secret is successful (single file mount): + // /tmppath/kubernetes.io~secret/super-secret-thing + secretpath := filepath.Join(testPath, K8sSecret) + err = os.MkdirAll(secretpath, 0777) + assert.NoError(err) + secret := filepath.Join(secretpath, "super-secret-thing") + _, err = os.Create(secret) + assert.NoError(err) + result = isWatchableMount(secret) + assert.True(result) + + // Verify that if we have too many files, it will no longer be watchable: + // /tmp/kubernetes.io~configmap/amazing-dir-of-configs/ + // | - c0 + // | - c1 + // ... + // | - c7 + // should be okay. + // + // 9 files should cause the mount to be deemed "not watchable" + configs := filepath.Join(testPath, K8sConfigMap, "amazing-dir-of-configs") + err = os.MkdirAll(configs, 0777) + assert.NoError(err) + + for i := 0; i < 8; i++ { + _, err := os.Create(filepath.Join(configs, fmt.Sprintf("c%v", i))) + assert.NoError(err) + result = isWatchableMount(configs) + assert.True(result) + } + _, err = os.Create(filepath.Join(configs, "toomuch")) + assert.NoError(err) + result = isWatchableMount(configs) + assert.False(result) +} + func TestBindMountInvalidSourceSymlink(t *testing.T) { source := filepath.Join(testDir, "fooFile") os.Remove(source) diff --git a/src/runtime/virtcontainers/sandbox_test.go b/src/runtime/virtcontainers/sandbox_test.go index ae89b04b2b..e91650fdee 100644 --- a/src/runtime/virtcontainers/sandbox_test.go +++ b/src/runtime/virtcontainers/sandbox_test.go @@ -1299,7 +1299,7 @@ func TestPreAddDevice(t *testing.T) { mounts := make(map[string]Mount) ignoreMounts := make(map[string]Mount) - err = container.mountSharedDirMounts(context.Background(), mounts, ignoreMounts) + _, err = container.mountSharedDirMounts(context.Background(), mounts, ignoreMounts) assert.Nil(t, err) assert.Equal(t, len(mounts), 0,