From 9f3184c5a42cc107ac9a5dce5a0a3eaacda60814 Mon Sep 17 00:00:00 2001 From: Yu-Ju Hong Date: Tue, 7 Mar 2017 16:49:25 -0800 Subject: [PATCH] Remove DockerManager from kubelet This commit deletes code in dockertools that is only used by DockerManager. A follow-up change will rename and clean up the rest of the files in this package. The commit also sets EnableCRI to true if the container runtime is not rkt. A follow-up change will remove the flag/field and all references to it. --- pkg/kubelet/BUILD | 1 - pkg/kubelet/dockertools/BUILD | 70 - pkg/kubelet/dockertools/container_gc.go | 340 --- pkg/kubelet/dockertools/container_gc_test.go | 318 -- pkg/kubelet/dockertools/convert.go | 85 - pkg/kubelet/dockertools/convert_test.go | 90 - pkg/kubelet/dockertools/docker.go | 2 + pkg/kubelet/dockertools/docker_manager.go | 2637 +---------------- .../dockertools/docker_manager_linux.go | 57 - .../dockertools/docker_manager_linux_test.go | 466 --- .../dockertools/docker_manager_test.go | 1908 ------------ .../dockertools/docker_manager_unsupported.go | 33 - .../dockertools/docker_manager_windows.go | 57 - pkg/kubelet/dockertools/docker_test.go | 276 -- pkg/kubelet/dockertools/fake_manager.go | 78 - pkg/kubelet/dockertools/images.go | 102 - pkg/kubelet/dockertools/images_test.go | 334 --- pkg/kubelet/dockertools/labels.go | 249 -- pkg/kubelet/dockertools/labels_test.go | 142 - pkg/kubelet/kubelet.go | 107 +- 20 files changed, 145 insertions(+), 7207 deletions(-) delete mode 100644 pkg/kubelet/dockertools/container_gc.go delete mode 100644 pkg/kubelet/dockertools/container_gc_test.go delete mode 100644 pkg/kubelet/dockertools/convert.go delete mode 100644 pkg/kubelet/dockertools/convert_test.go delete mode 100644 pkg/kubelet/dockertools/docker_manager_linux_test.go delete mode 100644 pkg/kubelet/dockertools/docker_manager_test.go delete mode 100644 pkg/kubelet/dockertools/fake_manager.go delete mode 100644 pkg/kubelet/dockertools/images.go delete mode 100644 pkg/kubelet/dockertools/images_test.go delete mode 100644 pkg/kubelet/dockertools/labels.go delete mode 100644 pkg/kubelet/dockertools/labels_test.go diff --git a/pkg/kubelet/BUILD b/pkg/kubelet/BUILD index e44eee88d53..a2a24449ae8 100644 --- a/pkg/kubelet/BUILD +++ b/pkg/kubelet/BUILD @@ -101,7 +101,6 @@ go_library( "//pkg/util/mount:go_default_library", "//pkg/util/node:go_default_library", "//pkg/util/oom:go_default_library", - "//pkg/util/procfs:go_default_library", "//pkg/util/removeall:go_default_library", "//pkg/version:go_default_library", "//pkg/volume:go_default_library", diff --git a/pkg/kubelet/dockertools/BUILD b/pkg/kubelet/dockertools/BUILD index dd12a73d048..20d0bcbb43c 100644 --- a/pkg/kubelet/dockertools/BUILD +++ b/pkg/kubelet/dockertools/BUILD @@ -11,53 +11,26 @@ load( go_library( name = "go_default_library", srcs = [ - "container_gc.go", - "convert.go", "docker.go", "docker_manager.go", "docker_manager_linux.go", "exec.go", "fake_docker_client.go", - "fake_manager.go", - "images.go", "instrumented_docker.go", "kube_docker_client.go", - "labels.go", ], tags = ["automanaged"], deps = [ - "//pkg/api:go_default_library", "//pkg/api/v1:go_default_library", - "//pkg/api/v1/helper:go_default_library", "//pkg/client/unversioned/remotecommand:go_default_library", "//pkg/credentialprovider:go_default_library", - "//pkg/kubelet/cm:go_default_library", "//pkg/kubelet/container:go_default_library", - "//pkg/kubelet/custommetrics:go_default_library", - "//pkg/kubelet/dockertools/securitycontext:go_default_library", - "//pkg/kubelet/events:go_default_library", "//pkg/kubelet/images:go_default_library", "//pkg/kubelet/leaky:go_default_library", - "//pkg/kubelet/lifecycle:go_default_library", "//pkg/kubelet/metrics:go_default_library", - "//pkg/kubelet/network:go_default_library", - "//pkg/kubelet/network/hairpin:go_default_library", - "//pkg/kubelet/prober/results:go_default_library", - "//pkg/kubelet/qos:go_default_library", - "//pkg/kubelet/types:go_default_library", - "//pkg/kubelet/util/cache:go_default_library", - "//pkg/kubelet/util/format:go_default_library", "//pkg/security/apparmor:go_default_library", - "//pkg/securitycontext:go_default_library", "//pkg/util/exec:go_default_library", - "//pkg/util/oom:go_default_library", - "//pkg/util/procfs:go_default_library", - "//pkg/util/selinux:go_default_library", - "//pkg/util/strings:go_default_library", - "//pkg/util/tail:go_default_library", "//pkg/util/term:go_default_library", - "//pkg/util/version:go_default_library", - "//vendor/github.com/armon/circbuf:go_default_library", "//vendor/github.com/docker/distribution/digest:go_default_library", "//vendor/github.com/docker/distribution/reference:go_default_library", "//vendor/github.com/docker/docker/pkg/jsonmessage:go_default_library", @@ -65,36 +38,20 @@ go_library( "//vendor/github.com/docker/engine-api/client:go_default_library", "//vendor/github.com/docker/engine-api/types:go_default_library", "//vendor/github.com/docker/engine-api/types/container:go_default_library", - "//vendor/github.com/docker/engine-api/types/strslice:go_default_library", - "//vendor/github.com/docker/engine-api/types/versions:go_default_library", - "//vendor/github.com/docker/go-connections/nat:go_default_library", "//vendor/github.com/golang/glog:go_default_library", - "//vendor/github.com/google/cadvisor/info/v1:go_default_library", "//vendor/golang.org/x/net/context:go_default_library", "//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", - "//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library", - "//vendor/k8s.io/apimachinery/pkg/runtime/schema:go_default_library", "//vendor/k8s.io/apimachinery/pkg/types:go_default_library", "//vendor/k8s.io/apimachinery/pkg/util/errors:go_default_library", - "//vendor/k8s.io/apimachinery/pkg/util/runtime:go_default_library", - "//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library", - "//vendor/k8s.io/client-go/tools/record:go_default_library", "//vendor/k8s.io/client-go/util/clock:go_default_library", - "//vendor/k8s.io/client-go/util/flowcontrol:go_default_library", ], ) go_test( name = "go_default_test", srcs = [ - "container_gc_test.go", - "convert_test.go", - "docker_manager_linux_test.go", - "docker_manager_test.go", "docker_test.go", - "images_test.go", "kube_docker_client_test.go", - "labels_test.go", ], data = [ "fixtures/seccomp/sub/subtest", @@ -105,41 +62,14 @@ go_test( "automanaged", ], deps = [ - "//pkg/api/testapi:go_default_library", "//pkg/api/v1:go_default_library", - "//pkg/apis/componentconfig:go_default_library", "//pkg/credentialprovider:go_default_library", - "//pkg/kubelet/container:go_default_library", - "//pkg/kubelet/container/testing:go_default_library", - "//pkg/kubelet/events:go_default_library", "//pkg/kubelet/images:go_default_library", - "//pkg/kubelet/network:go_default_library", - "//pkg/kubelet/network/testing:go_default_library", - "//pkg/kubelet/prober/results:go_default_library", - "//pkg/kubelet/types:go_default_library", - "//pkg/kubelet/util/format:go_default_library", - "//pkg/security/apparmor:go_default_library", - "//pkg/util/exec:go_default_library", "//pkg/util/hash:go_default_library", - "//pkg/util/strings:go_default_library", "//vendor/github.com/docker/docker/pkg/jsonmessage:go_default_library", "//vendor/github.com/docker/engine-api/types:go_default_library", - "//vendor/github.com/docker/engine-api/types/container:go_default_library", - "//vendor/github.com/docker/engine-api/types/strslice:go_default_library", - "//vendor/github.com/docker/go-connections/nat:go_default_library", - "//vendor/github.com/golang/mock/gomock:go_default_library", - "//vendor/github.com/google/cadvisor/info/v1:go_default_library", "//vendor/github.com/stretchr/testify/assert:go_default_library", - "//vendor/github.com/stretchr/testify/require:go_default_library", - "//vendor/k8s.io/apimachinery/pkg/api/equality:go_default_library", - "//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", - "//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library", "//vendor/k8s.io/apimachinery/pkg/types:go_default_library", - "//vendor/k8s.io/apimachinery/pkg/util/intstr:go_default_library", - "//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library", - "//vendor/k8s.io/client-go/tools/record:go_default_library", - "//vendor/k8s.io/client-go/util/clock:go_default_library", - "//vendor/k8s.io/client-go/util/flowcontrol:go_default_library", ], ) diff --git a/pkg/kubelet/dockertools/container_gc.go b/pkg/kubelet/dockertools/container_gc.go deleted file mode 100644 index 8d902a7be4b..00000000000 --- a/pkg/kubelet/dockertools/container_gc.go +++ /dev/null @@ -1,340 +0,0 @@ -/* -Copyright 2014 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package dockertools - -import ( - "fmt" - "os" - "path" - "path/filepath" - "sort" - "time" - - dockertypes "github.com/docker/engine-api/types" - "github.com/golang/glog" - "k8s.io/apimachinery/pkg/types" - kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" - knetwork "k8s.io/kubernetes/pkg/kubelet/network" -) - -type containerGC struct { - client DockerInterface - podGetter podGetter - network *knetwork.PluginManager - containerLogsDir string -} - -func NewContainerGC(client DockerInterface, podGetter podGetter, network *knetwork.PluginManager, containerLogsDir string) *containerGC { - return &containerGC{ - client: client, - podGetter: podGetter, - network: network, - containerLogsDir: containerLogsDir, - } -} - -// Internal information kept for containers being considered for GC. -type containerGCInfo struct { - // Docker ID of the container. - id string - - // Docker name of the container. - dockerName string - - // Creation time for the container. - createTime time.Time - - // Full pod name, including namespace in the format `namespace_podName`. - // This comes from dockertools.ParseDockerName(...) - podNameWithNamespace string - - // Kubernetes pod UID - podUID types.UID - - // Container name in pod - containerName string - - // Container network mode - isHostNetwork bool -} - -// Containers are considered for eviction as units of (UID, container name) pair. -type evictUnit struct { - // UID of the pod. - uid types.UID - - // Name of the container in the pod. - name string -} - -type containersByEvictUnit map[evictUnit][]containerGCInfo - -// Returns the number of containers in this map. -func (cu containersByEvictUnit) NumContainers() int { - num := 0 - for key := range cu { - num += len(cu[key]) - } - - return num -} - -// Returns the number of pod in this map. -func (cu containersByEvictUnit) NumEvictUnits() int { - return len(cu) -} - -// Newest first. -type byCreated []containerGCInfo - -func (a byCreated) Len() int { return len(a) } -func (a byCreated) Swap(i, j int) { a[i], a[j] = a[j], a[i] } -func (a byCreated) Less(i, j int) bool { return a[i].createTime.After(a[j].createTime) } - -func (cgc *containerGC) enforceMaxContainersPerEvictUnit(evictUnits containersByEvictUnit, MaxContainers int) { - for uid := range evictUnits { - toRemove := len(evictUnits[uid]) - MaxContainers - - if toRemove > 0 { - evictUnits[uid] = cgc.removeOldestN(evictUnits[uid], toRemove) - } - } -} - -// Removes the oldest toRemove containers and returns the resulting slice. -func (cgc *containerGC) removeOldestN(containers []containerGCInfo, toRemove int) []containerGCInfo { - // Remove from oldest to newest (last to first). - numToKeep := len(containers) - toRemove - for i := numToKeep; i < len(containers); i++ { - cgc.removeContainer(containers[i]) - } - - // Assume we removed the containers so that we're not too aggressive. - return containers[:numToKeep] -} - -// Returns a full GC info structure on success, or a partial one on failure -func newContainerGCInfo(id string, inspectResult *dockertypes.ContainerJSON, created time.Time) (containerGCInfo, error) { - containerName, _, err := ParseDockerName(inspectResult.Name) - if err != nil { - return containerGCInfo{ - id: id, - dockerName: inspectResult.Name, - }, fmt.Errorf("failed to parse docker name %q: %v", inspectResult.Name, err) - } - - networkMode := getDockerNetworkMode(inspectResult) - return containerGCInfo{ - id: id, - dockerName: inspectResult.Name, - podNameWithNamespace: containerName.PodFullName, - podUID: containerName.PodUID, - containerName: containerName.ContainerName, - createTime: created, - isHostNetwork: networkMode == namespaceModeHost, - }, nil -} - -// Get all containers that are evictable. Evictable containers are: not running -// and created more than MinAge ago. -func (cgc *containerGC) evictableContainers(minAge time.Duration) (containersByEvictUnit, []containerGCInfo, []containerGCInfo, error) { - containers, err := GetKubeletDockerContainers(cgc.client, true) - if err != nil { - return containersByEvictUnit{}, []containerGCInfo{}, []containerGCInfo{}, err - } - - unidentifiedContainers := make([]containerGCInfo, 0) - netContainers := make([]containerGCInfo, 0) - evictUnits := make(containersByEvictUnit) - newestGCTime := time.Now().Add(-minAge) - for _, container := range containers { - // Prune out running containers. - data, err := cgc.client.InspectContainer(container.ID) - if err != nil { - // Container may have been removed already, skip. - continue - } else if data.State.Running { - continue - } - - created, err := ParseDockerTimestamp(data.Created) - if err != nil { - glog.Errorf("Failed to parse Created timestamp %q for container %q", data.Created, container.ID) - } - if newestGCTime.Before(created) { - continue - } - - containerInfo, err := newContainerGCInfo(container.ID, data, created) - if err != nil { - unidentifiedContainers = append(unidentifiedContainers, containerInfo) - } else { - // Track net containers for special cleanup - if containerIsNetworked(containerInfo.containerName) { - netContainers = append(netContainers, containerInfo) - } - - key := evictUnit{ - uid: containerInfo.podUID, - name: containerInfo.containerName, - } - evictUnits[key] = append(evictUnits[key], containerInfo) - } - } - - // Sort the containers by age. - for uid := range evictUnits { - sort.Sort(byCreated(evictUnits[uid])) - } - - return evictUnits, netContainers, unidentifiedContainers, nil -} - -// GarbageCollect removes dead containers using the specified container gc policy -func (cgc *containerGC) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool) error { - // Separate containers by evict units. - evictUnits, netContainers, unidentifiedContainers, err := cgc.evictableContainers(gcPolicy.MinAge) - if err != nil { - return err - } - - // Remove unidentified containers. - for _, container := range unidentifiedContainers { - glog.Infof("Removing unidentified dead container %q", container.dockerName) - err = cgc.client.RemoveContainer(container.id, dockertypes.ContainerRemoveOptions{RemoveVolumes: true}) - if err != nil { - glog.Warningf("Failed to remove unidentified dead container %q: %v", container.dockerName, err) - } - } - - // Always clean up net containers to ensure network resources are released - // TODO: this may tear down networking again if the container doesn't get - // removed in this GC cycle, but that already happens elsewhere... - for _, container := range netContainers { - glog.Infof("Cleaning up dead net container %q", container.dockerName) - cgc.netContainerCleanup(container) - } - - // Remove deleted pod containers if all sources are ready. - if allSourcesReady { - for key, unit := range evictUnits { - if cgc.isPodDeleted(key.uid) { - cgc.removeOldestN(unit, len(unit)) // Remove all. - delete(evictUnits, key) - } - } - } - - // Enforce max containers per evict unit. - if gcPolicy.MaxPerPodContainer >= 0 { - cgc.enforceMaxContainersPerEvictUnit(evictUnits, gcPolicy.MaxPerPodContainer) - } - - // Enforce max total number of containers. - if gcPolicy.MaxContainers >= 0 && evictUnits.NumContainers() > gcPolicy.MaxContainers { - // Leave an equal number of containers per evict unit (min: 1). - numContainersPerEvictUnit := gcPolicy.MaxContainers / evictUnits.NumEvictUnits() - if numContainersPerEvictUnit < 1 { - numContainersPerEvictUnit = 1 - } - cgc.enforceMaxContainersPerEvictUnit(evictUnits, numContainersPerEvictUnit) - - // If we still need to evict, evict oldest first. - numContainers := evictUnits.NumContainers() - if numContainers > gcPolicy.MaxContainers { - flattened := make([]containerGCInfo, 0, numContainers) - for uid := range evictUnits { - flattened = append(flattened, evictUnits[uid]...) - } - sort.Sort(byCreated(flattened)) - - cgc.removeOldestN(flattened, numContainers-gcPolicy.MaxContainers) - } - } - - // Remove dead symlinks - should only happen on upgrade - // from a k8s version without proper log symlink cleanup - logSymlinks, _ := filepath.Glob(path.Join(cgc.containerLogsDir, fmt.Sprintf("*.%s", LogSuffix))) - for _, logSymlink := range logSymlinks { - if _, err = os.Stat(logSymlink); os.IsNotExist(err) { - err = os.Remove(logSymlink) - if err != nil { - glog.Warningf("Failed to remove container log dead symlink %q: %v", logSymlink, err) - } - } - } - - return nil -} - -func (cgc *containerGC) netContainerCleanup(containerInfo containerGCInfo) { - if containerInfo.isHostNetwork { - return - } - - podName, podNamespace, err := kubecontainer.ParsePodFullName(containerInfo.podNameWithNamespace) - if err != nil { - glog.Warningf("failed to parse container %q pod full name: %v", containerInfo.dockerName, err) - return - } - - containerID := kubecontainer.DockerID(containerInfo.id).ContainerID() - if err := cgc.network.TearDownPod(podNamespace, podName, containerID); err != nil { - glog.Warningf("failed to tear down container %q network: %v", containerInfo.dockerName, err) - } -} - -func (cgc *containerGC) removeContainer(containerInfo containerGCInfo) { - glog.V(4).Infof("Removing container %q", containerInfo.dockerName) - err := cgc.client.RemoveContainer(containerInfo.id, dockertypes.ContainerRemoveOptions{RemoveVolumes: true}) - if err != nil { - glog.Warningf("Failed to remove container %q: %v", containerInfo.dockerName, err) - } - symlinkPath := LogSymlink(cgc.containerLogsDir, containerInfo.podNameWithNamespace, containerInfo.containerName, containerInfo.id) - err = os.Remove(symlinkPath) - if err != nil && !os.IsNotExist(err) { - glog.Warningf("Failed to remove container %q log symlink %q: %v", containerInfo.dockerName, symlinkPath, err) - } -} - -func (cgc *containerGC) deleteContainer(id string) error { - data, err := cgc.client.InspectContainer(id) - if err != nil { - glog.Warningf("Failed to inspect container %q: %v", id, err) - return err - } - if data.State.Running { - return fmt.Errorf("container %q is still running", id) - } - - containerInfo, err := newContainerGCInfo(id, data, time.Now()) - if err != nil { - return err - } - - if containerIsNetworked(containerInfo.containerName) { - cgc.netContainerCleanup(containerInfo) - } - - cgc.removeContainer(containerInfo) - return nil -} - -func (cgc *containerGC) isPodDeleted(podUID types.UID) bool { - _, found := cgc.podGetter.GetPodByUID(podUID) - return !found -} diff --git a/pkg/kubelet/dockertools/container_gc_test.go b/pkg/kubelet/dockertools/container_gc_test.go deleted file mode 100644 index c0eeecb2296..00000000000 --- a/pkg/kubelet/dockertools/container_gc_test.go +++ /dev/null @@ -1,318 +0,0 @@ -/* -Copyright 2014 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package dockertools - -import ( - "fmt" - "reflect" - "sort" - "testing" - "time" - - "github.com/golang/mock/gomock" - "github.com/stretchr/testify/assert" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" - "k8s.io/kubernetes/pkg/api/v1" - kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" - knetwork "k8s.io/kubernetes/pkg/kubelet/network" - nettest "k8s.io/kubernetes/pkg/kubelet/network/testing" -) - -func newTestContainerGC(t *testing.T) (*containerGC, *FakeDockerClient, *nettest.MockNetworkPlugin) { - fakeDocker := NewFakeDockerClient() - fakePodGetter := newFakePodGetter() - fakePlugin := nettest.NewMockNetworkPlugin(gomock.NewController(t)) - fakePlugin.EXPECT().Name().Return("someNetworkPlugin").AnyTimes() - gc := NewContainerGC(fakeDocker, fakePodGetter, knetwork.NewPluginManager(fakePlugin), "") - return gc, fakeDocker, fakePlugin -} - -// Makes a stable time object, lower id is earlier time. -func makeTime(id int) time.Time { - var zero time.Time - return zero.Add(time.Duration(id) * time.Second) -} - -// Makes a container with the specified properties. -func makeContainer(id, uid, name string, running bool, created time.Time) *FakeContainer { - return &FakeContainer{ - Name: fmt.Sprintf("/k8s_%s_bar_new_%s_42", name, uid), - Running: running, - ID: id, - CreatedAt: created, - } -} - -// Makes a container with unidentified name and specified properties. -func makeUndefinedContainer(id string, running bool, created time.Time) *FakeContainer { - return &FakeContainer{ - Name: "/k8s_unidentified", - Running: running, - ID: id, - CreatedAt: created, - } -} - -func addPods(podGetter podGetter, podUIDs ...types.UID) { - fakePodGetter := podGetter.(*fakePodGetter) - for _, uid := range podUIDs { - fakePodGetter.pods[uid] = &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "pod" + string(uid), - Namespace: "test", - UID: uid, - }, - } - } -} - -func verifyStringArrayEqualsAnyOrder(t *testing.T, actual, expected []string) { - act := make([]string, len(actual)) - exp := make([]string, len(expected)) - copy(act, actual) - copy(exp, expected) - - sort.StringSlice(act).Sort() - sort.StringSlice(exp).Sort() - - if !reflect.DeepEqual(exp, act) { - t.Errorf("Expected(sorted): %#v, Actual(sorted): %#v", exp, act) - } -} - -func TestDeleteContainerSkipRunningContainer(t *testing.T) { - gc, fakeDocker, _ := newTestContainerGC(t) - fakeDocker.SetFakeContainers([]*FakeContainer{ - makeContainer("1876", "foo", "POD", true, makeTime(0)), - }) - addPods(gc.podGetter, "foo") - - assert.Error(t, gc.deleteContainer("1876")) - assert.Len(t, fakeDocker.Removed, 0) -} - -func TestDeleteContainerRemoveDeadContainer(t *testing.T) { - gc, fakeDocker, fakePlugin := newTestContainerGC(t) - defer fakePlugin.Finish() - fakeDocker.SetFakeContainers([]*FakeContainer{ - makeContainer("1876", "foo", "POD", false, makeTime(0)), - }) - addPods(gc.podGetter, "foo") - - fakePlugin.EXPECT().TearDownPod(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil) - - assert.Nil(t, gc.deleteContainer("1876")) - assert.Len(t, fakeDocker.Removed, 1) -} - -func TestGarbageCollectNetworkTeardown(t *testing.T) { - // Ensure infra container gets teardown called - gc, fakeDocker, fakePlugin := newTestContainerGC(t) - defer fakePlugin.Finish() - id := kubecontainer.DockerID("1867").ContainerID() - fakeDocker.SetFakeContainers([]*FakeContainer{ - makeContainer(id.ID, "foo", "POD", false, makeTime(0)), - }) - addPods(gc.podGetter, "foo") - - fakePlugin.EXPECT().TearDownPod(gomock.Any(), gomock.Any(), id).Return(nil) - - assert.Nil(t, gc.deleteContainer(id.ID)) - assert.Len(t, fakeDocker.Removed, 1) - - // Ensure non-infra container does not have teardown called - gc, fakeDocker, fakePlugin = newTestContainerGC(t) - id = kubecontainer.DockerID("1877").ContainerID() - fakeDocker.SetFakeContainers([]*FakeContainer{ - makeContainer(id.ID, "foo", "adsfasdfasdf", false, makeTime(0)), - }) - fakePlugin.EXPECT().SetUpPod(gomock.Any(), gomock.Any(), id).Return(nil) - - addPods(gc.podGetter, "foo") - - assert.Nil(t, gc.deleteContainer(id.ID)) - assert.Len(t, fakeDocker.Removed, 1) -} - -func TestGarbageCollectZeroMaxContainers(t *testing.T) { - gc, fakeDocker, fakePlugin := newTestContainerGC(t) - defer fakePlugin.Finish() - fakeDocker.SetFakeContainers([]*FakeContainer{ - makeContainer("1876", "foo", "POD", false, makeTime(0)), - }) - addPods(gc.podGetter, "foo") - - fakePlugin.EXPECT().TearDownPod(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil) - - assert.Nil(t, gc.GarbageCollect(kubecontainer.ContainerGCPolicy{MinAge: time.Minute, MaxPerPodContainer: 1, MaxContainers: 0}, true)) - assert.Len(t, fakeDocker.Removed, 1) -} - -func TestGarbageCollectNoMaxPerPodContainerLimit(t *testing.T) { - gc, fakeDocker, fakePlugin := newTestContainerGC(t) - defer fakePlugin.Finish() - fakeDocker.SetFakeContainers([]*FakeContainer{ - makeContainer("1876", "foo", "POD", false, makeTime(0)), - makeContainer("2876", "foo1", "POD", false, makeTime(1)), - makeContainer("3876", "foo2", "POD", false, makeTime(2)), - makeContainer("4876", "foo3", "POD", false, makeTime(3)), - makeContainer("5876", "foo4", "POD", false, makeTime(4)), - }) - addPods(gc.podGetter, "foo", "foo1", "foo2", "foo3", "foo4") - - fakePlugin.EXPECT().TearDownPod(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).Times(5) - - assert.Nil(t, gc.GarbageCollect(kubecontainer.ContainerGCPolicy{MinAge: time.Minute, MaxPerPodContainer: -1, MaxContainers: 4}, true)) - assert.Len(t, fakeDocker.Removed, 1) -} - -func TestGarbageCollectNoMaxLimit(t *testing.T) { - gc, fakeDocker, fakePlugin := newTestContainerGC(t) - defer fakePlugin.Finish() - fakeDocker.SetFakeContainers([]*FakeContainer{ - makeContainer("1876", "foo", "POD", false, makeTime(0)), - makeContainer("2876", "foo1", "POD", false, makeTime(0)), - makeContainer("3876", "foo2", "POD", false, makeTime(0)), - makeContainer("4876", "foo3", "POD", false, makeTime(0)), - makeContainer("5876", "foo4", "POD", false, makeTime(0)), - }) - addPods(gc.podGetter, "foo", "foo1", "foo2", "foo3", "foo4") - - fakePlugin.EXPECT().TearDownPod(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).Times(5) - - assert.Nil(t, gc.GarbageCollect(kubecontainer.ContainerGCPolicy{MinAge: time.Minute, MaxPerPodContainer: -1, MaxContainers: -1}, true)) - assert.Len(t, fakeDocker.Removed, 0) -} - -func TestGarbageCollect(t *testing.T) { - tests := []struct { - containers []*FakeContainer - expectedRemoved []string - }{ - // Don't remove containers started recently. - { - containers: []*FakeContainer{ - makeContainer("1876", "foo", "POD", false, time.Now()), - makeContainer("2876", "foo", "POD", false, time.Now()), - makeContainer("3876", "foo", "POD", false, time.Now()), - }, - }, - // Remove oldest containers. - { - containers: []*FakeContainer{ - makeContainer("1876", "foo", "POD", false, makeTime(0)), - makeContainer("2876", "foo", "POD", false, makeTime(1)), - makeContainer("3876", "foo", "POD", false, makeTime(2)), - }, - expectedRemoved: []string{"1876"}, - }, - // Only remove non-running containers. - { - containers: []*FakeContainer{ - makeContainer("1876", "foo", "POD", true, makeTime(0)), - makeContainer("2876", "foo", "POD", false, makeTime(1)), - makeContainer("3876", "foo", "POD", false, makeTime(2)), - makeContainer("4876", "foo", "POD", false, makeTime(3)), - }, - expectedRemoved: []string{"2876"}, - }, - // Less than maxContainerCount doesn't delete any. - { - containers: []*FakeContainer{ - makeContainer("1876", "foo", "POD", false, makeTime(0)), - }, - }, - // maxContainerCount applies per (UID,container) pair. - { - containers: []*FakeContainer{ - makeContainer("1876", "foo", "POD", false, makeTime(0)), - makeContainer("2876", "foo", "POD", false, makeTime(1)), - makeContainer("3876", "foo", "POD", false, makeTime(2)), - makeContainer("1076", "foo", "bar", false, makeTime(0)), - makeContainer("2076", "foo", "bar", false, makeTime(1)), - makeContainer("3076", "foo", "bar", false, makeTime(2)), - makeContainer("1176", "foo2", "POD", false, makeTime(0)), - makeContainer("2176", "foo2", "POD", false, makeTime(1)), - makeContainer("3176", "foo2", "POD", false, makeTime(2)), - }, - expectedRemoved: []string{"1076", "1176", "1876"}, - }, - // Remove non-running unidentified Kubernetes containers. - { - containers: []*FakeContainer{ - makeUndefinedContainer("1876", true, makeTime(0)), - makeUndefinedContainer("2876", false, makeTime(0)), - makeContainer("3876", "foo", "POD", false, makeTime(0)), - }, - expectedRemoved: []string{"2876"}, - }, - // Max limit applied and tries to keep from every pod. - { - containers: []*FakeContainer{ - makeContainer("1876", "foo", "POD", false, makeTime(0)), - makeContainer("2876", "foo", "POD", false, makeTime(1)), - makeContainer("3876", "foo1", "POD", false, makeTime(0)), - makeContainer("4876", "foo1", "POD", false, makeTime(1)), - makeContainer("5876", "foo2", "POD", false, makeTime(0)), - makeContainer("6876", "foo2", "POD", false, makeTime(1)), - makeContainer("7876", "foo3", "POD", false, makeTime(0)), - makeContainer("8876", "foo3", "POD", false, makeTime(1)), - makeContainer("9876", "foo4", "POD", false, makeTime(0)), - makeContainer("10876", "foo4", "POD", false, makeTime(1)), - }, - expectedRemoved: []string{"1876", "3876", "5876", "7876", "9876"}, - }, - // If more pods than limit allows, evicts oldest pod. - { - containers: []*FakeContainer{ - makeContainer("1876", "foo", "POD", false, makeTime(1)), - makeContainer("2876", "foo", "POD", false, makeTime(2)), - makeContainer("3876", "foo1", "POD", false, makeTime(1)), - makeContainer("4876", "foo1", "POD", false, makeTime(2)), - makeContainer("5876", "foo2", "POD", false, makeTime(0)), - makeContainer("6876", "foo3", "POD", false, makeTime(1)), - makeContainer("7876", "foo4", "POD", false, makeTime(0)), - makeContainer("8876", "foo5", "POD", false, makeTime(1)), - makeContainer("9876", "foo6", "POD", false, makeTime(2)), - makeContainer("10876", "foo7", "POD", false, makeTime(1)), - }, - expectedRemoved: []string{"1876", "3876", "5876", "7876"}, - }, - // Containers for deleted pods should be GC'd. - { - containers: []*FakeContainer{ - makeContainer("1876", "foo", "POD", false, makeTime(1)), - makeContainer("2876", "foo", "POD", false, makeTime(2)), - makeContainer("3876", "deleted", "POD", false, makeTime(1)), - makeContainer("4876", "deleted", "POD", false, makeTime(2)), - makeContainer("5876", "deleted", "POD", false, time.Now()), // Deleted pods still respect MinAge. - }, - expectedRemoved: []string{"3876", "4876"}, - }, - } - for i, test := range tests { - t.Logf("Running test case with index %d", i) - gc, fakeDocker, fakePlugin := newTestContainerGC(t) - fakeDocker.SetFakeContainers(test.containers) - addPods(gc.podGetter, "foo", "foo1", "foo2", "foo3", "foo4", "foo5", "foo6", "foo7") - fakePlugin.EXPECT().TearDownPod(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).AnyTimes() - assert.Nil(t, gc.GarbageCollect(kubecontainer.ContainerGCPolicy{MinAge: time.Hour, MaxPerPodContainer: 2, MaxContainers: 6}, true)) - verifyStringArrayEqualsAnyOrder(t, fakeDocker.Removed, test.expectedRemoved) - fakePlugin.Finish() - } -} diff --git a/pkg/kubelet/dockertools/convert.go b/pkg/kubelet/dockertools/convert.go deleted file mode 100644 index 6a3bf882dc3..00000000000 --- a/pkg/kubelet/dockertools/convert.go +++ /dev/null @@ -1,85 +0,0 @@ -/* -Copyright 2015 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package dockertools - -import ( - "fmt" - "strings" - - dockertypes "github.com/docker/engine-api/types" - kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" -) - -// This file contains helper functions to convert docker API types to runtime -// (kubecontainer) types. -const ( - statusRunningPrefix = "Up" - statusCreatedPrefix = "Created" - statusExitedPrefix = "Exited" -) - -func mapState(state string) kubecontainer.ContainerState { - // Parse the state string in dockertypes.Container. This could break when - // we upgrade docker. - switch { - case strings.HasPrefix(state, statusRunningPrefix): - return kubecontainer.ContainerStateRunning - case strings.HasPrefix(state, statusExitedPrefix): - return kubecontainer.ContainerStateExited - default: - return kubecontainer.ContainerStateUnknown - } -} - -// Converts dockertypes.Container to kubecontainer.Container. -func toRuntimeContainer(c *dockertypes.Container) (*kubecontainer.Container, error) { - if c == nil { - return nil, fmt.Errorf("unable to convert a nil pointer to a runtime container") - } - - dockerName, hash, err := getDockerContainerNameInfo(c) - if err != nil { - return nil, err - } - - return &kubecontainer.Container{ - ID: kubecontainer.DockerID(c.ID).ContainerID(), - Name: dockerName.ContainerName, - Image: c.Image, - ImageID: c.ImageID, - Hash: hash, - // (random-liu) docker uses status to indicate whether a container is running or exited. - // However, in kubernetes we usually use state to indicate whether a container is running or exited, - // while use status to indicate the comprehensive status of the container. So we have different naming - // norm here. - State: mapState(c.Status), - }, nil -} - -// Converts dockertypes.Image to kubecontainer.Image. -func toRuntimeImage(image *dockertypes.Image) (*kubecontainer.Image, error) { - if image == nil { - return nil, fmt.Errorf("unable to convert a nil pointer to a runtime image") - } - - return &kubecontainer.Image{ - ID: image.ID, - RepoTags: image.RepoTags, - RepoDigests: image.RepoDigests, - Size: image.VirtualSize, - }, nil -} diff --git a/pkg/kubelet/dockertools/convert_test.go b/pkg/kubelet/dockertools/convert_test.go deleted file mode 100644 index 392a034acd8..00000000000 --- a/pkg/kubelet/dockertools/convert_test.go +++ /dev/null @@ -1,90 +0,0 @@ -/* -Copyright 2015 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package dockertools - -import ( - "reflect" - "testing" - - dockertypes "github.com/docker/engine-api/types" - kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" -) - -func TestMapState(t *testing.T) { - testCases := []struct { - input string - expected kubecontainer.ContainerState - }{ - {input: "Up 5 hours", expected: kubecontainer.ContainerStateRunning}, - {input: "Exited (0) 2 hours ago", expected: kubecontainer.ContainerStateExited}, - {input: "Created", expected: kubecontainer.ContainerStateUnknown}, - {input: "Random string", expected: kubecontainer.ContainerStateUnknown}, - } - - for i, test := range testCases { - if actual := mapState(test.input); actual != test.expected { - t.Errorf("Test[%d]: expected %q, got %q", i, test.expected, actual) - } - } -} - -func TestToRuntimeContainer(t *testing.T) { - original := &dockertypes.Container{ - ID: "ab2cdf", - Image: "bar_image", - Names: []string{"/k8s_bar.5678_foo_ns_1234_42"}, - Status: "Up 5 hours", - } - expected := &kubecontainer.Container{ - ID: kubecontainer.ContainerID{Type: "docker", ID: "ab2cdf"}, - Name: "bar", - Image: "bar_image", - Hash: 0x5678, - State: kubecontainer.ContainerStateRunning, - } - - actual, err := toRuntimeContainer(original) - if err != nil { - t.Fatalf("unexpected error %v", err) - } - if !reflect.DeepEqual(expected, actual) { - t.Errorf("expected %#v, got %#v", expected, actual) - } -} - -func TestToRuntimeImage(t *testing.T) { - original := &dockertypes.Image{ - ID: "aeeea", - RepoTags: []string{"abc", "def"}, - RepoDigests: []string{"123", "456"}, - VirtualSize: 1234, - } - expected := &kubecontainer.Image{ - ID: "aeeea", - RepoTags: []string{"abc", "def"}, - RepoDigests: []string{"123", "456"}, - Size: 1234, - } - - actual, err := toRuntimeImage(original) - if err != nil { - t.Fatalf("unexpected error %v", err) - } - if !reflect.DeepEqual(expected, actual) { - t.Errorf("expected %#v, got %#v", expected, actual) - } -} diff --git a/pkg/kubelet/dockertools/docker.go b/pkg/kubelet/dockertools/docker.go index 10ede873a5c..6abd7195463 100644 --- a/pkg/kubelet/dockertools/docker.go +++ b/pkg/kubelet/dockertools/docker.go @@ -387,6 +387,8 @@ func ConnectToDockerOrDie(dockerEndpoint string, requestTimeout, imagePullProgre // GetKubeletDockerContainers lists all container or just the running ones. // Returns a list of docker containers that we manage +// TODO: This function should be deleted after migrating +// test/e2e_node/garbage_collector_test.go off of it. func GetKubeletDockerContainers(client DockerInterface, allContainers bool) ([]*dockertypes.Container, error) { result := []*dockertypes.Container{} containers, err := client.ListContainers(dockertypes.ContainerListOptions{All: allContainers}) diff --git a/pkg/kubelet/dockertools/docker_manager.go b/pkg/kubelet/dockertools/docker_manager.go index 30e30477585..e2c3976c519 100644 --- a/pkg/kubelet/dockertools/docker_manager.go +++ b/pkg/kubelet/dockertools/docker_manager.go @@ -20,298 +20,67 @@ import ( "bytes" "crypto/md5" "encoding/json" - "errors" "fmt" "io" "io/ioutil" "os" "os/exec" - "path" "path/filepath" "strconv" "strings" - "sync" "time" - "github.com/armon/circbuf" dockertypes "github.com/docker/engine-api/types" - dockercontainer "github.com/docker/engine-api/types/container" - dockerstrslice "github.com/docker/engine-api/types/strslice" - dockerapiversion "github.com/docker/engine-api/types/versions" - dockernat "github.com/docker/go-connections/nat" "github.com/golang/glog" - cadvisorapi "github.com/google/cadvisor/info/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - kruntime "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/runtime/schema" - kubetypes "k8s.io/apimachinery/pkg/types" - utilruntime "k8s.io/apimachinery/pkg/util/runtime" - "k8s.io/apimachinery/pkg/util/sets" - "k8s.io/client-go/tools/record" - "k8s.io/client-go/util/flowcontrol" - "k8s.io/kubernetes/pkg/api" "k8s.io/kubernetes/pkg/api/v1" - v1helper "k8s.io/kubernetes/pkg/api/v1/helper" "k8s.io/kubernetes/pkg/client/unversioned/remotecommand" - "k8s.io/kubernetes/pkg/kubelet/cm" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" - dockersecurity "k8s.io/kubernetes/pkg/kubelet/dockertools/securitycontext" - "k8s.io/kubernetes/pkg/kubelet/events" - "k8s.io/kubernetes/pkg/kubelet/images" - "k8s.io/kubernetes/pkg/kubelet/lifecycle" - "k8s.io/kubernetes/pkg/kubelet/metrics" - knetwork "k8s.io/kubernetes/pkg/kubelet/network" - "k8s.io/kubernetes/pkg/kubelet/network/hairpin" - proberesults "k8s.io/kubernetes/pkg/kubelet/prober/results" - "k8s.io/kubernetes/pkg/kubelet/qos" - "k8s.io/kubernetes/pkg/kubelet/types" - "k8s.io/kubernetes/pkg/kubelet/util/cache" - "k8s.io/kubernetes/pkg/kubelet/util/format" "k8s.io/kubernetes/pkg/security/apparmor" - "k8s.io/kubernetes/pkg/securitycontext" - "k8s.io/kubernetes/pkg/util/oom" - "k8s.io/kubernetes/pkg/util/procfs" - "k8s.io/kubernetes/pkg/util/selinux" - utilstrings "k8s.io/kubernetes/pkg/util/strings" - "k8s.io/kubernetes/pkg/util/tail" - utilversion "k8s.io/kubernetes/pkg/util/version" ) const ( DockerType = "docker" dockerDefaultLoggingDriver = "json-file" - // https://docs.docker.com/engine/reference/api/docker_remote_api/ - // docker version should be at least 1.10.x - minimumDockerAPIVersion = "1.22" - - // Remote API version for docker daemon versions - // https://docs.docker.com/engine/reference/api/docker_remote_api/ - dockerV110APIVersion = "1.22" - - // ndots specifies the minimum number of dots that a domain name must contain for the resolver to consider it as FQDN (fully-qualified) - // we want to able to consider SRV lookup names like _dns._udp.kube-dns.default.svc to be considered relative. - // hence, setting ndots to be 5. - ndotsDNSOption = "options ndots:5\n" - // In order to avoid unnecessary SIGKILLs, give every container a minimum grace - // period after SIGTERM. Docker will guarantee the termination, but SIGTERM is - // potentially dangerous. - // TODO: evaluate whether there are scenarios in which SIGKILL is preferable to - // SIGTERM for certain process types, which may justify setting this to 0. - minimumGracePeriodInSeconds = 2 - - DockerNetnsFmt = "/proc/%v/ns/net" - - // String used to detect docker host mode for various namespaces (e.g. - // networking). Must match the value returned by docker inspect -f - // '{{.HostConfig.NetworkMode}}'. - namespaceModeHost = "host" - - // The expiration time of version cache. - versionCacheTTL = 60 * time.Second - // Docker changed the API for specifying options in v1.11 SecurityOptSeparatorChangeVersion = "1.23.0" // Corresponds to docker 1.11.x SecurityOptSeparatorOld = ':' SecurityOptSeparatorNew = '=' + + // https://docs.docker.com/engine/reference/api/docker_remote_api/ + // docker version should be at least 1.10.x + minimumDockerAPIVersion = "1.22" + + statusRunningPrefix = "Up" + statusExitedPrefix = "Exited" + statusCreatedPrefix = "Created" + + ndotsDNSOption = "options ndots:5\n" ) var ( - // DockerManager implements the Runtime and DirectStreamingRuntime interfaces. - _ kubecontainer.Runtime = &DockerManager{} - _ kubecontainer.DirectStreamingRuntime = &DockerManager{} - - // TODO: make this a TTL based pull (if image older than X policy, pull) - podInfraContainerImagePullPolicy = v1.PullIfNotPresent - - // Default set of seccomp security options. defaultSeccompOpt = []dockerOpt{{"seccomp", "unconfined", ""}} ) -type DockerManager struct { - client DockerInterface - recorder record.EventRecorder - containerRefManager *kubecontainer.RefManager - os kubecontainer.OSInterface - machineInfo *cadvisorapi.MachineInfo - - // The image name of the pod infra container. - podInfraContainerImage string - // (Optional) Additional environment variables to be set for the pod infra container. - podInfraContainerEnv []v1.EnvVar - - // TODO(yifan): Record the pull failure so we can eliminate the image checking? - // Lower level docker image puller. - dockerPuller DockerPuller - - // wrapped image puller. - imagePuller images.ImageManager - - // cgroup driver used by Docker runtime. - cgroupDriver string - - // Directory of container logs. - containerLogsDir string - - // Network plugin manager. - network *knetwork.PluginManager - - // Health check results. - livenessManager proberesults.Manager - - // RuntimeHelper that wraps kubelet to generate runtime container options. - runtimeHelper kubecontainer.RuntimeHelper - - // Runner of lifecycle events. - runner kubecontainer.HandlerRunner - - // Handler used to execute commands in containers. - execHandler ExecHandler - - // Used to set OOM scores of processes. - oomAdjuster *oom.OOMAdjuster - - // Get information from /proc mount. - procFs procfs.ProcFSInterface - - // If true, enforce container cpu limits with CFS quota support - cpuCFSQuota bool - - // Container GC manager - containerGC *containerGC - - // Support for gathering custom metrics. - enableCustomMetrics bool - - // If true, the "hairpin mode" flag is set on container interfaces. - // A false value means the kubelet just backs off from setting it, - // it might already be true. - configureHairpinMode bool - - // Provides image stats - *imageStatsProvider - - // The version cache of docker daemon. - versionCache *cache.ObjectCache - - // Directory to host local seccomp profiles. - seccompProfileRoot string -} - -// A subset of the pod.Manager interface extracted for testing purposes. -type podGetter interface { - GetPodByUID(kubetypes.UID) (*v1.Pod, bool) -} - -func PodInfraContainerEnv(env map[string]string) kubecontainer.Option { - return func(rt kubecontainer.Runtime) { - dm := rt.(*DockerManager) - for k, v := range env { - dm.podInfraContainerEnv = append(dm.podInfraContainerEnv, v1.EnvVar{ - Name: k, - Value: v, - }) - } - } -} - -func NewDockerManager( - client DockerInterface, - recorder record.EventRecorder, - livenessManager proberesults.Manager, - containerRefManager *kubecontainer.RefManager, - podGetter podGetter, - machineInfo *cadvisorapi.MachineInfo, - podInfraContainerImage string, - qps float32, - burst int, - containerLogsDir string, - osInterface kubecontainer.OSInterface, - networkPlugin knetwork.NetworkPlugin, - runtimeHelper kubecontainer.RuntimeHelper, - httpClient types.HttpGetter, - execHandler ExecHandler, - oomAdjuster *oom.OOMAdjuster, - procFs procfs.ProcFSInterface, - cpuCFSQuota bool, - imageBackOff *flowcontrol.Backoff, - serializeImagePulls bool, - enableCustomMetrics bool, - hairpinMode bool, - seccompProfileRoot string, - options ...kubecontainer.Option) *DockerManager { - // Wrap the docker client with instrumentedDockerInterface - client = NewInstrumentedDockerInterface(client) - - // cgroup driver is only detectable in docker 1.11+ - // when the execution driver is not detectable, we provide the cgroupfs form. - // if your docker engine is configured to use the systemd cgroup driver, and you - // want to use pod level cgroups, you must be on docker 1.12+ to ensure cgroup-parent - // is converted appropriately. otherwise, docker will fail to launch the container - // and complain the cgroup name provided did not conform to systemd conventions. - var cgroupDriver string - dockerInfo, err := client.Info() +// GetImageRef returns the image digest if exists, or else returns the image ID. +// It is exported for reusing in dockershim. +func GetImageRef(client DockerInterface, image string) (string, error) { + img, err := client.InspectImageByRef(image) if err != nil { - glog.Errorf("Failed to execute Info() call to the Docker client: %v", err) - } else { - cgroupDriver = dockerInfo.CgroupDriver - glog.Infof("Setting cgroupDriver to %s", cgroupDriver) + return "", err + } + if img == nil { + return "", fmt.Errorf("unable to inspect image %s", image) } - dm := &DockerManager{ - client: client, - recorder: recorder, - containerRefManager: containerRefManager, - os: osInterface, - machineInfo: machineInfo, - podInfraContainerImage: podInfraContainerImage, - dockerPuller: newDockerPuller(client), - cgroupDriver: cgroupDriver, - containerLogsDir: containerLogsDir, - network: knetwork.NewPluginManager(networkPlugin), - livenessManager: livenessManager, - runtimeHelper: runtimeHelper, - execHandler: execHandler, - oomAdjuster: oomAdjuster, - procFs: procFs, - cpuCFSQuota: cpuCFSQuota, - enableCustomMetrics: enableCustomMetrics, - configureHairpinMode: hairpinMode, - imageStatsProvider: newImageStatsProvider(client), - seccompProfileRoot: seccompProfileRoot, - } - cmdRunner := kubecontainer.DirectStreamingRunner(dm) - dm.runner = lifecycle.NewHandlerRunner(httpClient, cmdRunner, dm) - dm.imagePuller = images.NewImageManager(kubecontainer.FilterEventRecorder(recorder), dm, imageBackOff, serializeImagePulls, qps, burst) - dm.containerGC = NewContainerGC(client, podGetter, dm.network, containerLogsDir) - - dm.versionCache = cache.NewObjectCache( - func() (interface{}, error) { - return dm.getVersionInfo() - }, - versionCacheTTL, - ) - - // apply optional settings.. - for _, optf := range options { - optf(dm) + // Returns the digest if it exist. + if len(img.RepoDigests) > 0 { + return img.RepoDigests[0], nil } - return dm -} - -// GetContainerLogs returns logs of a specific container. By -// default, it returns a snapshot of the container log. Set 'follow' to true to -// stream the log. Set 'follow' to false and specify the number of lines (e.g. -// "100" or "all") to tail the log. -// TODO: Make 'RawTerminal' option flagable. -func (dm *DockerManager) GetContainerLogs(pod *v1.Pod, containerID kubecontainer.ContainerID, logOptions *v1.PodLogOptions, stdout, stderr io.Writer) error { - container, err := dm.client.InspectContainer(containerID.ID) - if err != nil { - return err - } - return GetContainerLogs(dm.client, pod, containerID, logOptions, stdout, stderr, container.Config.Tty) + return img.ID, nil } // Temporarily export this function to share with dockershim. @@ -344,980 +113,6 @@ func GetContainerLogs(client DockerInterface, pod *v1.Pod, containerID kubeconta return client.Logs(containerID.ID, opts, sopts) } -var ( - // ErrContainerCannotRun is returned when a container is created, but cannot run properly - ErrContainerCannotRun = errors.New("ContainerCannotRun") -) - -// determineContainerIP determines the IP address of the given container. It is expected -// that the container passed is the infrastructure container of a pod and the responsibility -// of the caller to ensure that the correct container is passed. -func (dm *DockerManager) determineContainerIP(podNamespace, podName string, container *dockertypes.ContainerJSON) (string, error) { - result := getContainerIP(container) - - networkMode := getDockerNetworkMode(container) - isHostNetwork := networkMode == namespaceModeHost - - // For host networking or default network plugin, GetPodNetworkStatus doesn't work - if !isHostNetwork && dm.network.PluginName() != knetwork.DefaultPluginName { - netStatus, err := dm.network.GetPodNetworkStatus(podNamespace, podName, kubecontainer.DockerID(container.ID).ContainerID()) - if err != nil { - glog.Error(err) - return result, err - } else if netStatus != nil { - result = netStatus.IP.String() - } - } - - return result, nil -} - -func (dm *DockerManager) inspectContainer(id string, podName, podNamespace string) (*kubecontainer.ContainerStatus, string, error) { - var ip string - iResult, err := dm.client.InspectContainer(id) - if err != nil { - return nil, ip, err - } - glog.V(4).Infof("Container inspect result: %+v", *iResult) - - // TODO: Get k8s container name by parsing the docker name. This will be - // replaced by checking docker labels eventually. - dockerName, hash, err := ParseDockerName(iResult.Name) - if err != nil { - return nil, ip, fmt.Errorf("Unable to parse docker name %q", iResult.Name) - } - containerName := dockerName.ContainerName - - var containerInfo *labelledContainerInfo - containerInfo = getContainerInfoFromLabel(iResult.Config.Labels) - - parseTimestampError := func(label, s string) { - glog.Errorf("Failed to parse %q timestamp %q for container %q of pod %q", label, s, id, kubecontainer.BuildPodFullName(podName, podNamespace)) - } - var createdAt, startedAt, finishedAt time.Time - if createdAt, err = ParseDockerTimestamp(iResult.Created); err != nil { - parseTimestampError("Created", iResult.Created) - } - if startedAt, err = ParseDockerTimestamp(iResult.State.StartedAt); err != nil { - parseTimestampError("StartedAt", iResult.State.StartedAt) - } - if finishedAt, err = ParseDockerTimestamp(iResult.State.FinishedAt); err != nil { - parseTimestampError("FinishedAt", iResult.State.FinishedAt) - } - - // default to the image ID, but try and inspect for the RepoDigests - imageID := DockerPrefix + iResult.Image - imageName := iResult.Config.Image - imgInspectResult, err := dm.client.InspectImageByID(iResult.Image) - if err != nil { - utilruntime.HandleError(fmt.Errorf("unable to inspect docker image %q while inspecting docker container %q: %v", iResult.Image, containerName, err)) - } else { - if len(imgInspectResult.RepoDigests) > 1 { - glog.V(4).Infof("Container %q had more than one associated RepoDigest (%v), only using the first", containerName, imgInspectResult.RepoDigests) - } - - if len(imgInspectResult.RepoDigests) > 0 { - imageID = DockerPullablePrefix + imgInspectResult.RepoDigests[0] - } - - if len(imgInspectResult.RepoTags) > 0 { - imageName = imgInspectResult.RepoTags[0] - } - } - - status := kubecontainer.ContainerStatus{ - Name: containerName, - RestartCount: containerInfo.RestartCount, - Image: imageName, - ImageID: imageID, - ID: kubecontainer.DockerID(id).ContainerID(), - ExitCode: iResult.State.ExitCode, - CreatedAt: createdAt, - Hash: hash, - } - if iResult.State.Running { - // Container that are running, restarting and paused - status.State = kubecontainer.ContainerStateRunning - status.StartedAt = startedAt - if containerProvidesPodIP(dockerName.ContainerName) { - ip, err = dm.determineContainerIP(podNamespace, podName, iResult) - // Kubelet doesn't handle the network error scenario - if err != nil { - status.State = kubecontainer.ContainerStateUnknown - status.Message = fmt.Sprintf("Network error: %#v", err) - } - } - return &status, ip, nil - } - - // Find containers that have exited or failed to start. - if !finishedAt.IsZero() || iResult.State.ExitCode != 0 { - // Containers that are exited, dead or created (docker failed to start container) - // When a container fails to start State.ExitCode is non-zero, FinishedAt and StartedAt are both zero - reason := "" - message := iResult.State.Error - - // Note: An application might handle OOMKilled gracefully. - // In that case, the container is oom killed, but the exit - // code could be 0. - if iResult.State.OOMKilled { - reason = "OOMKilled" - } else if iResult.State.ExitCode == 0 { - reason = "Completed" - } else if !finishedAt.IsZero() { - reason = "Error" - } else { - // finishedAt is zero and ExitCode is nonZero occurs when docker fails to start the container - reason = ErrContainerCannotRun.Error() - // Adjust time to the time docker attempted to run the container, otherwise startedAt and finishedAt will be set to epoch, which is misleading - finishedAt = createdAt - startedAt = createdAt - } - - // retrieve the termination message from logs, file, or file with fallback to logs in case of failure - fallbackToLogs := containerInfo.TerminationMessagePolicy == v1.TerminationMessageFallbackToLogsOnError && (iResult.State.ExitCode != 0 || iResult.State.OOMKilled) - if msg := getTerminationMessage(dm.c, iResult, containerInfo.TerminationMessagePath, fallbackToLogs); len(msg) > 0 { - message = msg - } - - status.State = kubecontainer.ContainerStateExited - status.Message = message - status.Reason = reason - status.StartedAt = startedAt - status.FinishedAt = finishedAt - } else { - // Non-running containers that are created (not yet started or kubelet failed before calling - // start container function etc.) Kubelet doesn't handle these scenarios yet. - status.State = kubecontainer.ContainerStateUnknown - } - return &status, "", nil -} - -func getTerminationMessage(c DockerInterface, iResult *dockertypes.ContainerJSON, terminationMessagePath string, fallbackToLogs bool) string { - if len(terminationMessagePath) != 0 { - for _, mount := range iResult.Mounts { - if mount.Destination != terminationMessagePath { - continue - } - path := mount.Source - data, _, err := tail.ReadAtMost(path, kubecontainer.MaxContainerTerminationMessageLength) - if err != nil { - return fmt.Sprintf("Error on reading termination log %s: %v", path, err) - } - if !fallbackToLogs || len(data) != 0 { - return string(data) - } - } - } - if !fallbackToLogs { - return "" - } - - return readLastStringFromContainerLogs(c, iResult.Name) -} - -// readLastStringFromContainerLogs attempts to a certain amount from the end of the logs for containerName. -// It will attempt to avoid reading excessive logs from the server, which may result in underestimating the amount -// of logs to fetch (such that the length of the response message is < max). -func readLastStringFromContainerLogs(c DockerInterface, containerName string) string { - logOptions := dockertypes.ContainerLogsOptions{ - ShowStdout: true, - ShowStderr: true, - } - buf, _ := circbuf.NewBuffer(kubecontainer.MaxContainerTerminationMessageLogLength) - streamOptions := StreamOptions{ - ErrorStream: buf, - OutputStream: buf, - } - logOptions.Tail = strconv.FormatInt(kubecontainer.MaxContainerTerminationMessageLogLines, 10) - if err := c.Logs(containerName, logOptions, streamOptions); err != nil { - return fmt.Sprintf("Error on reading termination message from logs: %v", err) - } - return buf.String() -} - -// makeEnvList converts EnvVar list to a list of strings, in the form of -// '=', which can be understood by docker. -func makeEnvList(envs []kubecontainer.EnvVar) (result []string) { - for _, env := range envs { - result = append(result, fmt.Sprintf("%s=%s", env.Name, env.Value)) - } - return -} - -// makeMountBindings converts the mount list to a list of strings that -// can be understood by docker. -// Each element in the string is in the form of: -// ':', or -// '::ro', if the path is read only, or -// '::Z', if the volume requires SELinux -// relabeling -func makeMountBindings(mounts []kubecontainer.Mount) (result []string) { - for _, m := range mounts { - bind := fmt.Sprintf("%s:%s", m.HostPath, m.ContainerPath) - if m.ReadOnly { - bind += ":ro" - } - if m.SELinuxRelabel && selinux.SELinuxEnabled() { - if m.ReadOnly { - bind += ",Z" - } else { - bind += ":Z" - } - - } - result = append(result, bind) - } - return -} - -func makePortsAndBindings(portMappings []kubecontainer.PortMapping) (map[dockernat.Port]struct{}, map[dockernat.Port][]dockernat.PortBinding) { - exposedPorts := map[dockernat.Port]struct{}{} - portBindings := map[dockernat.Port][]dockernat.PortBinding{} - for _, port := range portMappings { - exteriorPort := port.HostPort - if exteriorPort == 0 { - // No need to do port binding when HostPort is not specified - continue - } - interiorPort := port.ContainerPort - // Some of this port stuff is under-documented voodoo. - // See http://stackoverflow.com/questions/20428302/binding-a-port-to-a-host-interface-using-the-rest-api - var protocol string - switch strings.ToUpper(string(port.Protocol)) { - case "UDP": - protocol = "/udp" - case "TCP": - protocol = "/tcp" - default: - glog.Warningf("Unknown protocol %q: defaulting to TCP", port.Protocol) - protocol = "/tcp" - } - - dockerPort := dockernat.Port(strconv.Itoa(interiorPort) + protocol) - exposedPorts[dockerPort] = struct{}{} - - hostBinding := dockernat.PortBinding{ - HostPort: strconv.Itoa(exteriorPort), - HostIP: port.HostIP, - } - - // Allow multiple host ports bind to same docker port - if existedBindings, ok := portBindings[dockerPort]; ok { - // If a docker port already map to a host port, just append the host ports - portBindings[dockerPort] = append(existedBindings, hostBinding) - } else { - // Otherwise, it's fresh new port binding - portBindings[dockerPort] = []dockernat.PortBinding{ - hostBinding, - } - } - } - return exposedPorts, portBindings -} - -func (dm *DockerManager) runContainer( - pod *v1.Pod, - container *v1.Container, - opts *kubecontainer.RunContainerOptions, - ref *v1.ObjectReference, - imageRef string, - netMode string, - ipcMode string, - utsMode string, - pidMode string, - restartCount int, - oomScoreAdj int) (kubecontainer.ContainerID, error) { - - dockerName := KubeletContainerName{ - PodFullName: kubecontainer.GetPodFullName(pod), - PodUID: pod.UID, - ContainerName: container.Name, - } - - securityOpts, err := dm.getSecurityOpts(pod, container.Name) - if err != nil { - return kubecontainer.ContainerID{}, err - } - optSeparator, err := dm.getDockerOptSeparator() - if err != nil { - return kubecontainer.ContainerID{}, err - } - fmtSecurityOpts := FmtDockerOpts(securityOpts, optSeparator) - - // Pod information is recorded on the container as labels to preserve it in the event the pod is deleted - // while the Kubelet is down and there is no information available to recover the pod. - // TODO: keep these labels up to date if the pod changes - labels := newLabels(container, pod, restartCount, dm.enableCustomMetrics) - - // TODO(random-liu): Remove this when we start to use new labels for KillContainerInPod - if container.Lifecycle != nil && container.Lifecycle.PreStop != nil { - // TODO: This is kind of hacky, we should really just encode the bits we need. - // TODO: This is hacky because the Kubelet should be parameterized to encode a specific version - // and needs to be able to migrate this whenever we deprecate v1. Should be a member of DockerManager. - if data, err := kruntime.Encode(api.Codecs.LegacyCodec(schema.GroupVersion{Group: v1.GroupName, Version: "v1"}), pod); err == nil { - labels[kubernetesPodLabel] = string(data) - } else { - glog.Errorf("Failed to encode pod: %s for prestop hook", pod.Name) - } - } - memoryLimit := container.Resources.Limits.Memory().Value() - cpuRequest := container.Resources.Requests.Cpu() - cpuLimit := container.Resources.Limits.Cpu() - var cpuShares int64 - // If request is not specified, but limit is, we want request to default to limit. - // API server does this for new containers, but we repeat this logic in Kubelet - // for containers running on existing Kubernetes clusters. - if cpuRequest.IsZero() && !cpuLimit.IsZero() { - cpuShares = cm.MilliCPUToShares(cpuLimit.MilliValue()) - } else { - // if cpuRequest.Amount is nil, then milliCPUToShares will return the minimal number - // of CPU shares. - cpuShares = cm.MilliCPUToShares(cpuRequest.MilliValue()) - } - - // Set devices for container. - devices := make([]dockercontainer.DeviceMapping, len(opts.Devices)) - for i, device := range opts.Devices { - devices[i] = dockercontainer.DeviceMapping{ - PathOnHost: device.PathOnHost, - PathInContainer: device.PathInContainer, - CgroupPermissions: device.Permissions, - } - } - binds := makeMountBindings(opts.Mounts) - - // The reason we create and mount the log file in here (not in kubelet) is because - // the file's location depends on the ID of the container, and we need to create and - // mount the file before actually starting the container. - // TODO(yifan): Consider to pull this logic out since we might need to reuse it in - // other container runtime. - _, containerName, cid := BuildDockerName(dockerName, container) - if opts.PodContainerDir != "" && len(container.TerminationMessagePath) != 0 { - // Because the PodContainerDir contains pod uid and container name which is unique enough, - // here we just add a unique container id to make the path unique for different instances - // of the same container. - containerLogPath := path.Join(opts.PodContainerDir, cid) - fs, err := dm.os.Create(containerLogPath) - if err != nil { - // TODO: Clean up the previously created dir? return the error? - utilruntime.HandleError(fmt.Errorf("error creating termination-log file %q: %v", containerLogPath, err)) - } else { - fs.Close() // Close immediately; we're just doing a `touch` here - - // Chmod is needed because ioutil.WriteFile() ends up calling - // open(2) to create the file, so the final mode used is "mode & - // ~umask". But we want to make sure the specified mode is used - // in the file no matter what the umask is. - if err := dm.os.Chmod(containerLogPath, 0666); err != nil { - utilruntime.HandleError(fmt.Errorf("unable to set termination-log file permissions %q: %v", containerLogPath, err)) - } - - // Have docker relabel the termination log path if SELinux is - // enabled. - b := fmt.Sprintf("%s:%s", containerLogPath, container.TerminationMessagePath) - if selinux.SELinuxEnabled() { - b += ":Z" - } - binds = append(binds, b) - } - } - - userNsMode := "" - if opts.EnableHostUserNamespace { - userNsMode = "host" - } - - hc := &dockercontainer.HostConfig{ - Binds: binds, - NetworkMode: dockercontainer.NetworkMode(netMode), - IpcMode: dockercontainer.IpcMode(ipcMode), - UTSMode: dockercontainer.UTSMode(utsMode), - PidMode: dockercontainer.PidMode(pidMode), - UsernsMode: dockercontainer.UsernsMode(userNsMode), - ReadonlyRootfs: readOnlyRootFilesystem(container), - Resources: dockercontainer.Resources{ - Memory: memoryLimit, - MemorySwap: -1, - CPUShares: cpuShares, - Devices: devices, - }, - SecurityOpt: fmtSecurityOpts, - } - - updateHostConfig(hc, opts) - - // Set sysctls if requested - if container.Name == PodInfraContainerName { - sysctls, unsafeSysctls, err := v1helper.SysctlsFromPodAnnotations(pod.Annotations) - if err != nil { - dm.recorder.Eventf(ref, v1.EventTypeWarning, events.FailedToCreateContainer, "Failed to create docker container %q of pod %q with error: %v", container.Name, format.Pod(pod), err) - return kubecontainer.ContainerID{}, err - } - if len(sysctls)+len(unsafeSysctls) > 0 { - hc.Sysctls = make(map[string]string, len(sysctls)+len(unsafeSysctls)) - for _, c := range sysctls { - hc.Sysctls[c.Name] = c.Value - } - for _, c := range unsafeSysctls { - hc.Sysctls[c.Name] = c.Value - } - } - } - - // If current api version is equal to or newer than docker 1.10 requested, set OomScoreAdj to HostConfig - result, err := dm.checkDockerAPIVersion(dockerV110APIVersion) - if err != nil { - glog.Errorf("Failed to check docker api version: %v", err) - } else if result >= 0 { - hc.OomScoreAdj = oomScoreAdj - } - - if dm.cpuCFSQuota { - // if cpuLimit.Amount is nil, then the appropriate default value is returned to allow full usage of cpu resource. - cpuQuota, cpuPeriod := cm.MilliCPUToQuota(cpuLimit.MilliValue()) - - hc.CPUQuota = cpuQuota - hc.CPUPeriod = cpuPeriod - } - - if len(opts.CgroupParent) > 0 { - cgroupParent := opts.CgroupParent - // if docker uses the systemd cgroup driver, it expects *.slice style names for cgroup parent. - // if we configured kubelet to use --cgroup-driver=cgroupfs, and docker is configured to use systemd driver - // docker will fail to launch the container because the name we provide will not be a valid slice. - // this is a very good thing. - if dm.cgroupDriver == "systemd" { - cgroupParent, err = cm.ConvertCgroupFsNameToSystemd(opts.CgroupParent) - if err != nil { - return kubecontainer.ContainerID{}, err - } - } - hc.CgroupParent = cgroupParent - glog.V(3).Infof("Container %v/%v/%v: setting cgroup parent: %v", pod.Namespace, pod.Name, container.Name, hc.CgroupParent) - } - - dockerOpts := dockertypes.ContainerCreateConfig{ - Name: containerName, - Config: &dockercontainer.Config{ - Env: makeEnvList(opts.Envs), - Image: imageRef, - WorkingDir: container.WorkingDir, - Labels: labels, - // Interactive containers: - OpenStdin: container.Stdin, - StdinOnce: container.StdinOnce, - Tty: container.TTY, - }, - HostConfig: hc, - } - - // Set network configuration for infra-container - if container.Name == PodInfraContainerName { - setInfraContainerNetworkConfig(pod, netMode, opts, &dockerOpts) - } - - setEntrypointAndCommand(container, opts, dockerOpts) - - glog.V(3).Infof("Container %v/%v/%v: setting entrypoint \"%v\" and command \"%v\"", pod.Namespace, pod.Name, container.Name, dockerOpts.Config.Entrypoint, dockerOpts.Config.Cmd) - - supplementalGids := dm.runtimeHelper.GetExtraSupplementalGroupsForPod(pod) - securityContextProvider := dockersecurity.NewSimpleSecurityContextProvider(optSeparator) - securityContextProvider.ModifyContainerConfig(pod, container, dockerOpts.Config) - securityContextProvider.ModifyHostConfig(pod, container, dockerOpts.HostConfig, supplementalGids) - createResp, err := dm.client.CreateContainer(dockerOpts) - if err != nil { - dm.recorder.Eventf(ref, v1.EventTypeWarning, events.FailedToCreateContainer, "Failed to create docker container %q of pod %q with error: %v", container.Name, format.Pod(pod), err) - return kubecontainer.ContainerID{}, err - } - if len(createResp.Warnings) != 0 { - glog.V(2).Infof("Container %q of pod %q created with warnings: %v", container.Name, format.Pod(pod), createResp.Warnings) - } - - createdEventMsg := fmt.Sprintf("Created container with docker id %v", utilstrings.ShortenString(createResp.ID, 12)) - if len(securityOpts) > 0 { - var msgs []string - for _, opt := range securityOpts { - msg := opt.msg - if msg == "" { - msg = opt.value - } - msgs = append(msgs, fmt.Sprintf("%s=%s", opt.key, truncateMsg(msg, 256))) - } - createdEventMsg = fmt.Sprintf("%s; Security:[%s]", createdEventMsg, strings.Join(msgs, " ")) - } - dm.recorder.Eventf(ref, v1.EventTypeNormal, events.CreatedContainer, createdEventMsg) - - if err = dm.client.StartContainer(createResp.ID); err != nil { - dm.recorder.Eventf(ref, v1.EventTypeWarning, events.FailedToStartContainer, - "Failed to start container with docker id %v with error: %v", utilstrings.ShortenString(createResp.ID, 12), err) - return kubecontainer.ContainerID{}, err - } - dm.recorder.Eventf(ref, v1.EventTypeNormal, events.StartedContainer, "Started container with docker id %v", utilstrings.ShortenString(createResp.ID, 12)) - - return kubecontainer.DockerID(createResp.ID).ContainerID(), nil -} - -// setInfraContainerNetworkConfig sets the network configuration for the infra-container. We only set network configuration for infra-container, all -// the user containers will share the same network namespace with infra-container. -// NOTE: cluster dns settings aren't passed anymore to docker api in all cases, not only for pods with host network: -// the resolver conf will be overwritten after infra-container creation to override docker's behaviour -func setInfraContainerNetworkConfig(pod *v1.Pod, netMode string, opts *kubecontainer.RunContainerOptions, dockerOpts *dockertypes.ContainerCreateConfig) { - exposedPorts, portBindings := makePortsAndBindings(opts.PortMappings) - dockerOpts.Config.ExposedPorts = exposedPorts - dockerOpts.HostConfig.PortBindings = dockernat.PortMap(portBindings) - - if netMode != namespaceModeHost { - dockerOpts.Config.Hostname = opts.Hostname - } -} - -func setEntrypointAndCommand(container *v1.Container, opts *kubecontainer.RunContainerOptions, dockerOpts dockertypes.ContainerCreateConfig) { - command, args := kubecontainer.ExpandContainerCommandAndArgs(container, opts.Envs) - - dockerOpts.Config.Entrypoint = dockerstrslice.StrSlice(command) - dockerOpts.Config.Cmd = dockerstrslice.StrSlice(args) -} - -// A helper function to get the KubeletContainerName and hash from a docker -// container. -func getDockerContainerNameInfo(c *dockertypes.Container) (*KubeletContainerName, uint64, error) { - if len(c.Names) == 0 { - return nil, 0, fmt.Errorf("cannot parse empty docker container name: %#v", c.Names) - } - dockerName, hash, err := ParseDockerName(c.Names[0]) - if err != nil { - return nil, 0, fmt.Errorf("parse docker container name %q error: %v", c.Names[0], err) - } - return dockerName, hash, nil -} - -// Get pod UID, name, and namespace by examining the container names. -func getPodInfoFromContainer(c *dockertypes.Container) (kubetypes.UID, string, string, error) { - dockerName, _, err := getDockerContainerNameInfo(c) - if err != nil { - return kubetypes.UID(""), "", "", err - } - name, namespace, err := kubecontainer.ParsePodFullName(dockerName.PodFullName) - if err != nil { - return kubetypes.UID(""), "", "", fmt.Errorf("parse pod full name %q error: %v", dockerName.PodFullName, err) - } - return dockerName.PodUID, name, namespace, nil -} - -// GetContainers returns a list of running containers if |all| is false; -// otherwise, it returns all containers. -func (dm *DockerManager) GetContainers(all bool) ([]*kubecontainer.Container, error) { - containers, err := GetKubeletDockerContainers(dm.client, all) - if err != nil { - return nil, err - } - // Convert DockerContainers to []*kubecontainer.Container - result := make([]*kubecontainer.Container, 0, len(containers)) - for _, c := range containers { - converted, err := toRuntimeContainer(c) - if err != nil { - glog.Errorf("Error examining the container %v: %v", c.ID, err) - continue - } - result = append(result, converted) - } - return result, nil -} - -func (dm *DockerManager) GetPods(all bool) ([]*kubecontainer.Pod, error) { - start := time.Now() - defer func() { - metrics.ContainerManagerLatency.WithLabelValues("GetPods").Observe(metrics.SinceInMicroseconds(start)) - }() - pods := make(map[kubetypes.UID]*kubecontainer.Pod) - var result []*kubecontainer.Pod - - containers, err := GetKubeletDockerContainers(dm.client, all) - if err != nil { - return nil, err - } - - // Group containers by pod. - for _, c := range containers { - converted, err := toRuntimeContainer(c) - if err != nil { - glog.Errorf("Error examining the container %v: %v", c.ID, err) - continue - } - - podUID, podName, podNamespace, err := getPodInfoFromContainer(c) - if err != nil { - glog.Errorf("Error examining the container %v: %v", c.ID, err) - continue - } - - pod, found := pods[podUID] - if !found { - pod = &kubecontainer.Pod{ - ID: podUID, - Name: podName, - Namespace: podNamespace, - } - pods[podUID] = pod - } - pod.Containers = append(pod.Containers, converted) - } - - // Convert map to list. - for _, p := range pods { - result = append(result, p) - } - return result, nil -} - -// List all images in the local storage. -func (dm *DockerManager) ListImages() ([]kubecontainer.Image, error) { - var images []kubecontainer.Image - - dockerImages, err := dm.client.ListImages(dockertypes.ImageListOptions{}) - if err != nil { - return images, err - } - - for _, di := range dockerImages { - image, err := toRuntimeImage(&di) - if err != nil { - continue - } - images = append(images, *image) - } - return images, nil -} - -// GetImageRef returns the image digest if exists, or else returns the image ID. -// It is exported for reusing in dockershim. -func GetImageRef(client DockerInterface, image string) (string, error) { - img, err := client.InspectImageByRef(image) - if err != nil { - return "", err - } - if img == nil { - return "", fmt.Errorf("unable to inspect image %s", image) - } - - // Returns the digest if it exist. - if len(img.RepoDigests) > 0 { - return img.RepoDigests[0], nil - } - - return img.ID, nil -} - -// PullImage pulls an image from network to local storage. -func (dm *DockerManager) PullImage(image kubecontainer.ImageSpec, secrets []v1.Secret) (string, error) { - err := dm.dockerPuller.Pull(image.Image, secrets) - if err != nil { - return "", err - } - - return GetImageRef(dm.client, image.Image) -} - -// GetImageRef gets the reference (digest or ID) of the image which has already been in -// the local storage. It returns ("", nil) if the image isn't in the local storage. -func (dm *DockerManager) GetImageRef(image kubecontainer.ImageSpec) (string, error) { - return dm.dockerPuller.GetImageRef(image.Image) -} - -// Removes the specified image. -func (dm *DockerManager) RemoveImage(image kubecontainer.ImageSpec) error { - // If the image has multiple tags, we need to remove all the tags - if inspectImage, err := dm.client.InspectImageByID(image.Image); err == nil && len(inspectImage.RepoTags) > 1 { - for _, tag := range inspectImage.RepoTags { - if _, err := dm.client.RemoveImage(tag, dockertypes.ImageRemoveOptions{PruneChildren: true}); err != nil { - return err - } - } - return nil - } - - _, err := dm.client.RemoveImage(image.Image, dockertypes.ImageRemoveOptions{PruneChildren: true}) - return err -} - -// podInfraContainerChanged returns true if the pod infra container has changed. -func (dm *DockerManager) podInfraContainerChanged(pod *v1.Pod, podInfraContainerStatus *kubecontainer.ContainerStatus) (bool, error) { - var ports []v1.ContainerPort - - // Check network mode. - if kubecontainer.IsHostNetworkPod(pod) { - dockerPodInfraContainer, err := dm.client.InspectContainer(podInfraContainerStatus.ID.ID) - if err != nil { - return false, err - } - - networkMode := getDockerNetworkMode(dockerPodInfraContainer) - if networkMode != namespaceModeHost { - glog.V(4).Infof("host: %v, %v", pod.Spec.HostNetwork, networkMode) - return true, nil - } - } else if !dm.pluginDisablesDockerNetworking() { - // Docker only exports ports from the pod infra container. Let's - // collect all of the relevant ports and export them. - for _, container := range pod.Spec.InitContainers { - ports = append(ports, container.Ports...) - } - for _, container := range pod.Spec.Containers { - ports = append(ports, container.Ports...) - } - } - expectedPodInfraContainer := &v1.Container{ - Name: PodInfraContainerName, - Image: dm.podInfraContainerImage, - Ports: ports, - ImagePullPolicy: podInfraContainerImagePullPolicy, - Env: dm.podInfraContainerEnv, - } - return podInfraContainerStatus.Hash != kubecontainer.HashContainerLegacy(expectedPodInfraContainer), nil -} - -// determine if the container root should be a read only filesystem. -func readOnlyRootFilesystem(container *v1.Container) bool { - return container.SecurityContext != nil && container.SecurityContext.ReadOnlyRootFilesystem != nil && *container.SecurityContext.ReadOnlyRootFilesystem -} - -// container must not be nil -func getDockerNetworkMode(container *dockertypes.ContainerJSON) string { - if container.HostConfig != nil { - return string(container.HostConfig.NetworkMode) - } - return "" -} - -func (dm *DockerManager) pluginDisablesDockerNetworking() bool { - return dm.network.PluginName() == "cni" || dm.network.PluginName() == "kubenet" -} - -// newDockerVersion returns a semantically versioned docker version value -func newDockerVersion(version string) (*utilversion.Version, error) { - return utilversion.ParseSemantic(version) -} - -// apiVersion implements kubecontainer.Version interface by implementing -// Compare() and String(). It uses the compare function of engine-api to -// compare docker apiversions. -type apiVersion string - -func (v apiVersion) String() string { - return string(v) -} - -func (v apiVersion) Compare(other string) (int, error) { - if dockerapiversion.LessThan(string(v), other) { - return -1, nil - } else if dockerapiversion.GreaterThan(string(v), other) { - return 1, nil - } - return 0, nil -} - -func (dm *DockerManager) Type() string { - return DockerType -} - -func (dm *DockerManager) Version() (kubecontainer.Version, error) { - v, err := dm.client.Version() - if err != nil { - return nil, fmt.Errorf("docker: failed to get docker version: %v", err) - } - version, err := newDockerVersion(v.Version) - if err != nil { - return nil, fmt.Errorf("docker: failed to parse docker version %q: %v", v.Version, err) - } - return version, nil -} - -func (dm *DockerManager) APIVersion() (kubecontainer.Version, error) { - v, err := dm.client.Version() - if err != nil { - return nil, fmt.Errorf("docker: failed to get docker version: %v", err) - } - - return apiVersion(v.APIVersion), nil -} - -// Status returns error if docker daemon is unhealthy, nil otherwise. -// Now we do this by checking whether: -// 1) `docker version` works -// 2) docker version is compatible with minimum requirement -func (dm *DockerManager) Status() (*kubecontainer.RuntimeStatus, error) { - return nil, dm.checkVersionCompatibility() -} - -func (dm *DockerManager) checkVersionCompatibility() error { - version, err := dm.APIVersion() - if err != nil { - return err - } - // Verify the docker version. - result, err := version.Compare(minimumDockerAPIVersion) - if err != nil { - return fmt.Errorf("failed to compare current docker version %v with minimum support Docker version %q - %v", version, minimumDockerAPIVersion, err) - } - if result < 0 { - return fmt.Errorf("container runtime version is older than %s", minimumDockerAPIVersion) - } - return nil -} - -func (dm *DockerManager) getDockerOptSeparator() (rune, error) { - sep := SecurityOptSeparatorNew - if result, err := dm.checkDockerAPIVersion(SecurityOptSeparatorChangeVersion); err != nil { - return sep, err - } else if result < 0 { - sep = SecurityOptSeparatorOld - } - return sep, nil -} - -// FmtDockerOpts formats the docker security options using the given separator. -func FmtDockerOpts(opts []dockerOpt, sep rune) []string { - fmtOpts := make([]string, len(opts)) - for i, opt := range opts { - fmtOpts[i] = fmt.Sprintf("%s%c%s", opt.key, sep, opt.value) - } - return fmtOpts -} - -type dockerOpt struct { - // The key-value pair passed to docker. - key, value string - // The alternative value to use in log/event messages. - msg string -} - -// Expose key/value from dockertools -func (d dockerOpt) GetKV() (string, string) { - return d.key, d.value -} - -// Get the docker security options for seccomp. -func (dm *DockerManager) getSeccompOpts(pod *v1.Pod, ctrName string) ([]dockerOpt, error) { - version, err := dm.APIVersion() - if err != nil { - return nil, err - } - - // seccomp is only on docker versions >= v1.10 - if result, err := version.Compare(dockerV110APIVersion); err != nil { - return nil, err - } else if result < 0 { - return nil, nil // return early for Docker < 1.10 - } - - return GetSeccompOpts(pod.ObjectMeta.Annotations, ctrName, dm.seccompProfileRoot) -} - -// Temporarily export this function to share with dockershim. -// TODO: clean this up. -func GetSeccompOpts(annotations map[string]string, ctrName, profileRoot string) ([]dockerOpt, error) { - profile, profileOK := annotations[v1.SeccompContainerAnnotationKeyPrefix+ctrName] - if !profileOK { - // try the pod profile - profile, profileOK = annotations[v1.SeccompPodAnnotationKey] - if !profileOK { - // return early the default - return defaultSeccompOpt, nil - } - } - - if profile == "unconfined" { - // return early the default - return defaultSeccompOpt, nil - } - - if profile == "docker/default" { - // return nil so docker will load the default seccomp profile - return nil, nil - } - - if !strings.HasPrefix(profile, "localhost/") { - return nil, fmt.Errorf("unknown seccomp profile option: %s", profile) - } - - name := strings.TrimPrefix(profile, "localhost/") // by pod annotation validation, name is a valid subpath - fname := filepath.Join(profileRoot, filepath.FromSlash(name)) - file, err := ioutil.ReadFile(fname) - if err != nil { - return nil, fmt.Errorf("cannot load seccomp profile %q: %v", name, err) - } - - b := bytes.NewBuffer(nil) - if err := json.Compact(b, file); err != nil { - return nil, err - } - // Rather than the full profile, just put the filename & md5sum in the event log. - msg := fmt.Sprintf("%s(md5:%x)", name, md5.Sum(file)) - - return []dockerOpt{{"seccomp", b.String(), msg}}, nil -} - -// Get the docker security options for AppArmor. -func (dm *DockerManager) getAppArmorOpts(pod *v1.Pod, ctrName string) ([]dockerOpt, error) { - profile := apparmor.GetProfileNameFromPodAnnotations(pod.Annotations, ctrName) - return GetAppArmorOpts(profile) -} - -// Temporarily export this function to share with dockershim. -// TODO: clean this up. -func GetAppArmorOpts(profile string) ([]dockerOpt, error) { - if profile == "" || profile == apparmor.ProfileRuntimeDefault { - // The docker applies the default profile by default. - return nil, nil - } - - // Assume validation has already happened. - profileName := strings.TrimPrefix(profile, apparmor.ProfileNamePrefix) - return []dockerOpt{{"apparmor", profileName, ""}}, nil -} - -type dockerExitError struct { - Inspect *dockertypes.ContainerExecInspect -} - -func (d *dockerExitError) String() string { - return d.Error() -} - -func (d *dockerExitError) Error() string { - return fmt.Sprintf("Error executing in Docker Container: %d", d.Inspect.ExitCode) -} - -func (d *dockerExitError) Exited() bool { - return !d.Inspect.Running -} - -func (d *dockerExitError) ExitStatus() int { - return d.Inspect.ExitCode -} - -// ExecInContainer runs the command inside the container identified by containerID. -func (dm *DockerManager) ExecInContainer(containerID kubecontainer.ContainerID, cmd []string, stdin io.Reader, stdout, stderr io.WriteCloser, tty bool, resize <-chan remotecommand.TerminalSize, timeout time.Duration) error { - if dm.execHandler == nil { - return errors.New("unable to exec without an exec handler") - } - - container, err := dm.client.InspectContainer(containerID.ID) - if err != nil { - return err - } - if !container.State.Running { - return fmt.Errorf("container not running (%s)", container.ID) - } - - return dm.execHandler.ExecInContainer(dm.client, container, cmd, stdin, stdout, stderr, tty, resize, timeout) -} - -func (dm *DockerManager) AttachContainer(containerID kubecontainer.ContainerID, stdin io.Reader, stdout, stderr io.WriteCloser, tty bool, resize <-chan remotecommand.TerminalSize) error { - return AttachContainer(dm.client, containerID.ID, stdin, stdout, stderr, tty, resize) -} - // Temporarily export this function to share with dockershim. // TODO: clean this up. func AttachContainer(client DockerInterface, containerID string, stdin io.Reader, stdout, stderr io.WriteCloser, tty bool, resize <-chan remotecommand.TerminalSize) error { @@ -1343,33 +138,6 @@ func AttachContainer(client DockerInterface, containerID string, stdin io.Reader return client.AttachToContainer(containerID, opts, sopts) } -func noPodInfraContainerError(podName, podNamespace string) error { - return fmt.Errorf("cannot find pod infra container in pod %q", kubecontainer.BuildPodFullName(podName, podNamespace)) -} - -// PortForward executes socat in the pod's network namespace and copies -// data between stream (representing the user's local connection on their -// computer) and the specified port in the container. -// -// TODO: -// - match cgroups of container -// - should we support nsenter + socat on the host? (current impl) -// - should we support nsenter + socat in a container, running with elevated privs and --pid=host? -func (dm *DockerManager) PortForward(pod *kubecontainer.Pod, port int32, stream io.ReadWriteCloser) error { - podInfraContainer := pod.FindContainerByName(PodInfraContainerName) - if podInfraContainer == nil { - return noPodInfraContainerError(pod.Name, pod.Namespace) - } - - return PortForward(dm.client, podInfraContainer.ID.ID, port, stream) -} - -// UpdatePodCIDR updates the podCIDR for the runtime. -// Currently no-ops, just implemented to satisfy the cri. -func (dm *DockerManager) UpdatePodCIDR(podCIDR string) error { - return nil -} - // Temporarily export this function to share with dockershim. func PortForward(client DockerInterface, podInfraContainerID string, port int32, stream io.ReadWriteCloser) error { container, err := client.InspectContainer(podInfraContainerID) @@ -1428,476 +196,115 @@ func PortForward(client DockerInterface, podInfraContainerID string, port int32, return nil } -// TODO(random-liu): Change running pod to pod status in the future. We can't do it now, because kubelet also uses this function without pod status. -// We can only deprecate this after refactoring kubelet. -// TODO(random-liu): After using pod status for KillPod(), we can also remove the kubernetesPodLabel, because all the needed information should have -// been extract from new labels and stored in pod status. -// only hard eviction scenarios should provide a grace period override, all other code paths must pass nil. -func (dm *DockerManager) KillPod(pod *v1.Pod, runningPod kubecontainer.Pod, gracePeriodOverride *int64) error { - result := dm.killPodWithSyncResult(pod, runningPod, gracePeriodOverride) - return result.Error() +// Temporarily export this function to share with dockershim. +// TODO: clean this up. +func GetAppArmorOpts(profile string) ([]dockerOpt, error) { + if profile == "" || profile == apparmor.ProfileRuntimeDefault { + // The docker applies the default profile by default. + return nil, nil + } + + // Assume validation has already happened. + profileName := strings.TrimPrefix(profile, apparmor.ProfileNamePrefix) + return []dockerOpt{{"apparmor", profileName, ""}}, nil } -// NOTE(random-liu): The pod passed in could be *nil* when kubelet restarted. -// runtimePod may contain either running or exited containers -func (dm *DockerManager) killPodWithSyncResult(pod *v1.Pod, runtimePod kubecontainer.Pod, gracePeriodOverride *int64) (result kubecontainer.PodSyncResult) { - // Short circuit if there's nothing to kill. - if len(runtimePod.Containers) == 0 { - return - } - // Send the kills in parallel since they may take a long time. - // There may be len(runtimePod.Containers) or len(runtimePod.Containers)-1 of result in the channel - containerResults := make(chan *kubecontainer.SyncResult, len(runtimePod.Containers)) - wg := sync.WaitGroup{} - var ( - networkContainers []*kubecontainer.Container - networkSpecs []*v1.Container - ) - wg.Add(len(runtimePod.Containers)) - for _, container := range runtimePod.Containers { - go func(container *kubecontainer.Container) { - defer utilruntime.HandleCrash() - defer wg.Done() - - var containerSpec *v1.Container - if pod != nil { - for i, c := range pod.Spec.Containers { - if c.Name == container.Name { - containerSpec = &pod.Spec.Containers[i] - break - } - } - if containerSpec == nil { - for i, c := range pod.Spec.InitContainers { - if c.Name == container.Name { - containerSpec = &pod.Spec.InitContainers[i] - break - } - } - } - } - - // TODO: Handle this without signaling the pod infra container to - // adapt to the generic container runtime. - if containerIsNetworked(container.Name) { - // Store the container runtime for later deletion. - // We do this so that PreStop handlers can run in the network namespace. - networkContainers = append(networkContainers, container) - networkSpecs = append(networkSpecs, containerSpec) - } else { - killContainerResult := kubecontainer.NewSyncResult(kubecontainer.KillContainer, container.Name) - err := dm.KillContainerInPod(container.ID, containerSpec, pod, "Need to kill pod.", gracePeriodOverride) - if err != nil { - killContainerResult.Fail(kubecontainer.ErrKillContainer, err.Error()) - glog.Errorf("Failed to delete container %v: %v; Skipping pod %q", container.ID.ID, err, runtimePod.ID) - } - containerResults <- killContainerResult - } - }(container) - } - wg.Wait() - close(containerResults) - for containerResult := range containerResults { - result.AddSyncResult(containerResult) - } - - // Tear down any dead or running network/infra containers, but only kill - // those that are still running. - for i := range networkContainers { - networkContainer := networkContainers[i] - networkSpec := networkSpecs[i] - - teardownNetworkResult := kubecontainer.NewSyncResult(kubecontainer.TeardownNetwork, kubecontainer.BuildPodFullName(runtimePod.Name, runtimePod.Namespace)) - result.AddSyncResult(teardownNetworkResult) - - ins, err := dm.client.InspectContainer(networkContainer.ID.ID) - if err != nil { - err = fmt.Errorf("Error inspecting container %v: %v", networkContainer.ID.ID, err) - glog.Error(err) - teardownNetworkResult.Fail(kubecontainer.ErrTeardownNetwork, err.Error()) - continue - } - - if getDockerNetworkMode(ins) != namespaceModeHost { - if err := dm.network.TearDownPod(runtimePod.Namespace, runtimePod.Name, networkContainer.ID); err != nil { - teardownNetworkResult.Fail(kubecontainer.ErrTeardownNetwork, err.Error()) - glog.Error(err) - } - } - if networkContainer.State == kubecontainer.ContainerStateRunning { - killContainerResult := kubecontainer.NewSyncResult(kubecontainer.KillContainer, networkContainer.Name) - result.AddSyncResult(killContainerResult) - if err := dm.KillContainerInPod(networkContainer.ID, networkSpec, pod, "Need to kill pod.", gracePeriodOverride); err != nil { - killContainerResult.Fail(kubecontainer.ErrKillContainer, err.Error()) - glog.Errorf("Failed to delete container %v: %v; Skipping pod %q", networkContainer.ID.ID, err, runtimePod.ID) - } - } - } - return -} - -// KillContainerInPod kills a container in the pod. It must be passed either a container ID or a container and pod, -// and will attempt to lookup the other information if missing. -func (dm *DockerManager) KillContainerInPod(containerID kubecontainer.ContainerID, container *v1.Container, pod *v1.Pod, message string, gracePeriodOverride *int64) error { - switch { - case containerID.IsEmpty(): - // Locate the container. - pods, err := dm.GetPods(false) - if err != nil { - return err - } - targetPod := kubecontainer.Pods(pods).FindPod(kubecontainer.GetPodFullName(pod), pod.UID) - targetContainer := targetPod.FindContainerByName(container.Name) - if targetContainer == nil { - return fmt.Errorf("unable to find container %q in pod %q", container.Name, targetPod.Name) - } - containerID = targetContainer.ID - - case container == nil || pod == nil: - // Read information about the container from labels - inspect, err := dm.client.InspectContainer(containerID.ID) - if err != nil { - return err - } - storedPod, storedContainer, cerr := containerAndPodFromLabels(inspect) - if cerr != nil { - glog.Errorf("unable to access pod data from container: %v", cerr) - } - if container == nil { - container = storedContainer - } - if pod == nil { - pod = storedPod - } - } - return dm.killContainer(containerID, container, pod, message, gracePeriodOverride) -} - -// killContainer accepts a containerID and an optional container or pod containing shutdown policies. Invoke -// KillContainerInPod if information must be retrieved first. It is only valid to provide a grace period override -// during hard eviction scenarios. All other code paths in kubelet must never provide a grace period override otherwise -// data corruption could occur in the end-user application. -func (dm *DockerManager) killContainer(containerID kubecontainer.ContainerID, container *v1.Container, pod *v1.Pod, reason string, gracePeriodOverride *int64) error { - ID := containerID.ID - name := ID - if container != nil { - name = fmt.Sprintf("%s %s", name, container.Name) - } - if pod != nil { - name = fmt.Sprintf("%s %s/%s", name, pod.Namespace, pod.Name) - } - - gracePeriod := int64(minimumGracePeriodInSeconds) - if pod != nil { - switch { - case pod.DeletionGracePeriodSeconds != nil: - gracePeriod = *pod.DeletionGracePeriodSeconds - case pod.Spec.TerminationGracePeriodSeconds != nil: - gracePeriod = *pod.Spec.TerminationGracePeriodSeconds - } - } - glog.V(2).Infof("Killing container %q with %d second grace period", name, gracePeriod) - start := metav1.Now() - - if pod != nil && container != nil && container.Lifecycle != nil && container.Lifecycle.PreStop != nil { - glog.V(4).Infof("Running preStop hook for container %q", name) - done := make(chan struct{}) - go func() { - defer close(done) - defer utilruntime.HandleCrash() - if msg, err := dm.runner.Run(containerID, pod, container, container.Lifecycle.PreStop); err != nil { - glog.Errorf("preStop hook for container %q failed: %v", name, err) - dm.generateFailedContainerEvent(containerID, pod.Name, events.FailedPreStopHook, msg) - } - }() - select { - case <-time.After(time.Duration(gracePeriod) * time.Second): - glog.Warningf("preStop hook for container %q did not complete in %d seconds", name, gracePeriod) - message := fmt.Sprintf("preStop hook for container %q did not complete in %d seconds", name, gracePeriod) - dm.generateFailedContainerEvent(containerID, pod.Name, events.UnfinishedPreStopHook, message) - case <-done: - glog.V(4).Infof("preStop hook for container %q completed", name) - } - gracePeriod -= int64(metav1.Now().Sub(start.Time).Seconds()) - } - - // if the caller did not specify a grace period override, we ensure that the grace period - // is not less than the minimal shutdown window to avoid unnecessary SIGKILLs. if a caller - // did provide an override, we always set the gracePeriod to that value. the only valid - // time to send an override is during eviction scenarios where we want to do a hard kill of - // a container because of resource exhaustion for incompressible resources (i.e. disk, memory). - if gracePeriodOverride == nil { - if gracePeriod < minimumGracePeriodInSeconds { - gracePeriod = minimumGracePeriodInSeconds - } - } else { - gracePeriod = *gracePeriodOverride - glog.V(2).Infof("Killing container %q, but using %d second grace period override", name, gracePeriod) - } - - err := dm.client.StopContainer(ID, int(gracePeriod)) - if err == nil { - glog.V(2).Infof("Container %q exited after %s", name, metav1.Now().Sub(start.Time)) - } else { - glog.Warningf("Container %q termination failed after %s: %v", name, metav1.Now().Sub(start.Time), err) - } - ref, ok := dm.containerRefManager.GetRef(containerID) - if !ok { - glog.Warningf("No ref for pod '%q'", name) - } else { - message := fmt.Sprintf("Killing container with docker id %v", utilstrings.ShortenString(ID, 12)) - if reason != "" { - message = fmt.Sprint(message, ": ", reason) - } - dm.recorder.Event(ref, v1.EventTypeNormal, events.KillingContainer, message) - dm.containerRefManager.ClearRef(containerID) - } - return err -} - -func (dm *DockerManager) generateFailedContainerEvent(containerID kubecontainer.ContainerID, podName, reason, message string) { - ref, ok := dm.containerRefManager.GetRef(containerID) - if !ok { - glog.Warningf("No ref for pod '%q'", podName) - return - } - dm.recorder.Event(ref, v1.EventTypeWarning, reason, message) -} - -var errNoPodOnContainer = fmt.Errorf("no pod information labels on Docker container") - -// containerAndPodFromLabels tries to load the appropriate container info off of a Docker container's labels -func containerAndPodFromLabels(inspect *dockertypes.ContainerJSON) (pod *v1.Pod, container *v1.Container, err error) { - if inspect == nil || inspect.Config == nil || inspect.Config.Labels == nil { - return nil, nil, errNoPodOnContainer - } - labels := inspect.Config.Labels - - // the pod data may not be set - if body, found := labels[kubernetesPodLabel]; found { - pod = &v1.Pod{} - if err = kruntime.DecodeInto(api.Codecs.UniversalDecoder(), []byte(body), pod); err == nil { - name := labels[types.KubernetesContainerNameLabel] - for ix := range pod.Spec.Containers { - if pod.Spec.Containers[ix].Name == name { - container = &pod.Spec.Containers[ix] - break - } - } - if container == nil { - for ix := range pod.Spec.InitContainers { - if pod.Spec.InitContainers[ix].Name == name { - container = &pod.Spec.InitContainers[ix] - break - } - } - } - if container == nil { - err = fmt.Errorf("unable to find container %s in pod %v", name, pod) - } - } else { - pod = nil +// Temporarily export this function to share with dockershim. +// TODO: clean this up. +func GetSeccompOpts(annotations map[string]string, ctrName, profileRoot string) ([]dockerOpt, error) { + profile, profileOK := annotations[v1.SeccompContainerAnnotationKeyPrefix+ctrName] + if !profileOK { + // try the pod profile + profile, profileOK = annotations[v1.SeccompPodAnnotationKey] + if !profileOK { + // return early the default + return defaultSeccompOpt, nil } } - // attempt to find the default grace period if we didn't commit a pod, but set the generic metadata - // field (the one used by kill) - if pod == nil { - if period, ok := labels[kubernetesPodTerminationGracePeriodLabel]; ok { - if seconds, err := strconv.ParseInt(period, 10, 64); err == nil { - pod = &v1.Pod{} - pod.DeletionGracePeriodSeconds = &seconds - } - } + if profile == "unconfined" { + // return early the default + return defaultSeccompOpt, nil } - return -} - -func (dm *DockerManager) applyOOMScoreAdj(pod *v1.Pod, container *v1.Container, containerInfo *dockertypes.ContainerJSON) error { - if containerInfo.State.Pid == 0 { - // Container exited. We cannot do anything about it. Ignore this error. - glog.V(2).Infof("Failed to apply OOM score adj on container %q with ID %q. Init process does not exist.", containerInfo.Name, containerInfo.ID) - return nil + if profile == "docker/default" { + // return nil so docker will load the default seccomp profile + return nil, nil } - cgroupName, err := dm.procFs.GetFullContainerName(containerInfo.State.Pid) + if !strings.HasPrefix(profile, "localhost/") { + return nil, fmt.Errorf("unknown seccomp profile option: %s", profile) + } + + name := strings.TrimPrefix(profile, "localhost/") // by pod annotation validation, name is a valid subpath + fname := filepath.Join(profileRoot, filepath.FromSlash(name)) + file, err := ioutil.ReadFile(fname) if err != nil { - if err == os.ErrNotExist { - // Container exited. We cannot do anything about it. Ignore this error. - glog.V(2).Infof("Failed to apply OOM score adj on container %q with ID %q. Init process does not exist.", containerInfo.Name, containerInfo.ID) - return nil - } - return err + return nil, fmt.Errorf("cannot load seccomp profile %q: %v", name, err) } - oomScoreAdj := dm.calculateOomScoreAdj(pod, container) - if err = dm.oomAdjuster.ApplyOOMScoreAdjContainer(cgroupName, oomScoreAdj, 5); err != nil { - if err == os.ErrNotExist { - // Container exited. We cannot do anything about it. Ignore this error. - glog.V(2).Infof("Failed to apply OOM score adj on container %q with ID %q. Init process does not exist.", containerInfo.Name, containerInfo.ID) - return nil - } - return err + + b := bytes.NewBuffer(nil) + if err := json.Compact(b, file); err != nil { + return nil, err } - return nil + // Rather than the full profile, just put the filename & md5sum in the event log. + msg := fmt.Sprintf("%s(md5:%x)", name, md5.Sum(file)) + + return []dockerOpt{{"seccomp", b.String(), msg}}, nil } -// Run a single container from a pod. Returns the docker container ID -// If do not need to pass labels, just pass nil. -func (dm *DockerManager) runContainerInPod(pod *v1.Pod, container *v1.Container, netMode, ipcMode, pidMode, podIP, imageRef string, restartCount int) (kubecontainer.ContainerID, error) { - start := time.Now() - defer func() { - metrics.ContainerManagerLatency.WithLabelValues("runContainerInPod").Observe(metrics.SinceInMicroseconds(start)) - }() - - ref, err := kubecontainer.GenerateContainerRef(pod, container) - if err != nil { - glog.Errorf("Can't make a ref to pod %v, container %v: '%v'", pod.Name, container.Name, err) - } else { - glog.V(5).Infof("Generating ref for container %s: %#v", container.Name, ref) +// FmtDockerOpts formats the docker security options using the given separator. +func FmtDockerOpts(opts []dockerOpt, sep rune) []string { + fmtOpts := make([]string, len(opts)) + for i, opt := range opts { + fmtOpts[i] = fmt.Sprintf("%s%c%s", opt.key, sep, opt.value) } - - opts, useClusterFirstPolicy, err := dm.runtimeHelper.GenerateRunContainerOptions(pod, container, podIP) - if err != nil { - return kubecontainer.ContainerID{}, fmt.Errorf("GenerateRunContainerOptions: %v", err) - } - - utsMode := "" - if kubecontainer.IsHostNetworkPod(pod) { - utsMode = namespaceModeHost - } - - oomScoreAdj := dm.calculateOomScoreAdj(pod, container) - - id, err := dm.runContainer(pod, container, opts, ref, imageRef, netMode, ipcMode, utsMode, pidMode, restartCount, oomScoreAdj) - if err != nil { - return kubecontainer.ContainerID{}, fmt.Errorf("runContainer: %v", err) - } - - // Remember this reference so we can report events about this container - if ref != nil { - dm.containerRefManager.SetRef(id, ref) - } - - if container.Lifecycle != nil && container.Lifecycle.PostStart != nil { - msg, handlerErr := dm.runner.Run(id, pod, container, container.Lifecycle.PostStart) - if handlerErr != nil { - err := fmt.Errorf("PostStart handler: %v", handlerErr) - dm.generateFailedContainerEvent(id, pod.Name, events.FailedPostStartHook, msg) - dm.KillContainerInPod(id, container, pod, err.Error(), nil) - return kubecontainer.ContainerID{}, err - } - } - - // Container information is used in adjusting OOM scores, adding ndots and getting the logPath. - containerInfo, err := dm.client.InspectContainer(id.ID) - if err != nil { - return kubecontainer.ContainerID{}, fmt.Errorf("InspectContainer: %v", err) - } - - containerLogFile := containerInfo.LogPath - if containerLogFile != "" { - // Create a symbolic link to the Docker container log file using a name which captures the - // full pod name, the container name and the Docker container ID. Cluster level logging will - // capture these symbolic filenames which can be used for search terms in Elasticsearch or for - // labels for Cloud Logging. - symlinkFile := LogSymlink(dm.containerLogsDir, kubecontainer.GetPodFullName(pod), container.Name, id.ID) - if err = dm.os.Symlink(containerLogFile, symlinkFile); err != nil { - glog.Errorf("Failed to create symbolic link to the log file of pod %q container %q: %v", format.Pod(pod), container.Name, err) - } - } else { - dockerLoggingDriver := "" - dockerInfo, err := dm.client.Info() - if err != nil { - glog.Errorf("Failed to execute Info() call to the Docker client: %v", err) - } else { - dockerLoggingDriver = dockerInfo.LoggingDriver - glog.V(10).Infof("Docker logging driver is %s", dockerLoggingDriver) - } - - if dockerLoggingDriver == dockerDefaultLoggingDriver { - glog.Errorf("Cannot create symbolic link because container log file doesn't exist!") - } else { - glog.V(5).Infof("Unsupported logging driver: %s", dockerLoggingDriver) - } - } - - // Check if current docker version is higher than 1.10. Otherwise, we have to apply OOMScoreAdj instead of using docker API. - // TODO: Remove this logic after we stop supporting docker version < 1.10. - if err = dm.applyOOMScoreAdjIfNeeded(pod, container, containerInfo); err != nil { - return kubecontainer.ContainerID{}, err - } - - // Re-write resolv.conf file generated by docker. - // NOTE: cluster dns settings aren't passed anymore to docker api in all cases, not only for pods with host network: - // the resolver conf will be overwritten after infra-container creation to override docker's behaviour - // This resolv.conf file is shared by all containers of the same pod, and needs to be modified only once per pod. - // we modify it when the pause container is created since it is the first container created in the pod since it holds - // the networking namespace. - if container.Name == PodInfraContainerName { - if err := RewriteResolvFile(containerInfo.ResolvConfPath, opts.DNS, opts.DNSSearch, useClusterFirstPolicy); err != nil { - return kubecontainer.ContainerID{}, err - } - } - - return id, err + return fmtOpts } -func (dm *DockerManager) applyOOMScoreAdjIfNeeded(pod *v1.Pod, container *v1.Container, containerInfo *dockertypes.ContainerJSON) error { - // Compare current API version with expected api version. - result, err := dm.checkDockerAPIVersion(dockerV110APIVersion) - if err != nil { - return fmt.Errorf("Failed to check docker api version: %v", err) - } - // If current api version is older than OOMScoreAdj requested, use the old way. - if result < 0 { - if err := dm.applyOOMScoreAdj(pod, container, containerInfo); err != nil { - return fmt.Errorf("Failed to apply oom-score-adj to container %q- %v", containerInfo.Name, err) - } - } - - return nil +type dockerOpt struct { + // The key-value pair passed to docker. + key, value string + // The alternative value to use in log/event messages. + msg string } -func (dm *DockerManager) calculateOomScoreAdj(pod *v1.Pod, container *v1.Container) int { - // Set OOM score of the container based on the priority of the container. - // Processes in lower-priority pods should be killed first if the system runs out of memory. - // The main pod infrastructure container is considered high priority, since if it is killed the - // whole pod will die. - var oomScoreAdj int - if container.Name == PodInfraContainerName { - oomScoreAdj = qos.PodInfraOOMAdj - } else { - oomScoreAdj = qos.GetContainerOOMScoreAdjust(pod, container, int64(dm.machineInfo.MemoryCapacity)) - - } - - return oomScoreAdj +// Expose key/value from dockertools +func (d dockerOpt) GetKV() (string, string) { + return d.key, d.value } -// versionInfo wraps api version and daemon version. -type versionInfo struct { - apiVersion kubecontainer.Version - daemonVersion kubecontainer.Version +// GetUserFromImageUser splits the user out of an user:group string. +func GetUserFromImageUser(id string) string { + if id == "" { + return id + } + // split instances where the id may contain user:group + if strings.Contains(id, ":") { + return strings.Split(id, ":")[0] + } + // no group, just return the id + return id } -// checkDockerAPIVersion checks current docker API version against expected version. -// Return: -// 1 : newer than expected version -// -1: older than expected version -// 0 : same version -func (dm *DockerManager) checkDockerAPIVersion(expectedVersion string) (int, error) { - value, err := dm.versionCache.Get(dm.machineInfo.MachineID) - if err != nil { - return 0, err - } - apiVersion := value.(versionInfo).apiVersion - result, err := apiVersion.Compare(expectedVersion) - if err != nil { - return 0, fmt.Errorf("Failed to compare current docker api version %v with OOMScoreAdj supported Docker version %q - %v", - apiVersion, expectedVersion, err) - } - return result, nil +type dockerExitError struct { + Inspect *dockertypes.ContainerExecInspect +} + +func (d *dockerExitError) String() string { + return d.Error() +} + +func (d *dockerExitError) Error() string { + return fmt.Sprintf("Error executing in Docker Container: %d", d.Inspect.ExitCode) +} + +func (d *dockerExitError) Exited() bool { + return !d.Inspect.Running +} + +func (d *dockerExitError) ExitStatus() int { + return d.Inspect.ExitCode } // RewriteResolvFile rewrites resolv.conf file generated by docker. @@ -1950,831 +357,3 @@ func rewriteFile(filePath, stringToWrite string) error { _, err = f.WriteString(stringToWrite) return err } - -// createPodInfraContainer starts the pod infra container for a pod. Returns the docker container ID of the newly created container. -// If any error occurs in this function, it will return a brief error and a detailed error message. -func (dm *DockerManager) createPodInfraContainer(pod *v1.Pod) (kubecontainer.DockerID, error, string) { - start := time.Now() - defer func() { - metrics.ContainerManagerLatency.WithLabelValues("createPodInfraContainer").Observe(metrics.SinceInMicroseconds(start)) - }() - // Use host networking if specified. - netNamespace := "" - var ports []v1.ContainerPort - - if kubecontainer.IsHostNetworkPod(pod) { - netNamespace = namespaceModeHost - } else if dm.pluginDisablesDockerNetworking() { - netNamespace = "none" - } else { - // Docker only exports ports from the pod infra container. Let's - // collect all of the relevant ports and export them. - for _, container := range pod.Spec.InitContainers { - ports = append(ports, container.Ports...) - } - for _, container := range pod.Spec.Containers { - ports = append(ports, container.Ports...) - } - } - - container := &v1.Container{ - Name: PodInfraContainerName, - Image: dm.podInfraContainerImage, - Ports: ports, - ImagePullPolicy: podInfraContainerImagePullPolicy, - Env: dm.podInfraContainerEnv, - } - - // No pod secrets for the infra container. - // The message isn't needed for the Infra container - imageRef, msg, err := dm.imagePuller.EnsureImageExists(pod, container, nil) - if err != nil { - return "", err, msg - } - - // Currently we don't care about restart count of infra container, just set it to 0. - id, err := dm.runContainerInPod(pod, container, netNamespace, getIPCMode(pod), getPidMode(pod), "", imageRef, 0) - if err != nil { - return "", kubecontainer.ErrRunContainer, err.Error() - } - - return kubecontainer.DockerID(id.ID), nil, "" -} - -// Structure keeping information on changes that need to happen for a pod. The semantics is as follows: -// - startInfraContainer is true if new Infra Containers have to be started and old one (if running) killed. -// Additionally if it is true then containersToKeep have to be empty -// - infraContainerId have to be set if and only if startInfraContainer is false. It stores dockerID of running Infra Container -// - containersToStart keeps indices of Specs of containers that have to be started and reasons why containers will be started. -// - containersToKeep stores mapping from dockerIDs of running containers to indices of their Specs for containers that -// should be kept running. If startInfraContainer is false then it contains an entry for infraContainerId (mapped to -1). -// It shouldn't be the case where containersToStart is empty and containersToKeep contains only infraContainerId. In such case -// Infra Container should be killed, hence it's removed from this map. -// - all init containers are stored in initContainersToKeep -// - all running containers which are NOT contained in containersToKeep and initContainersToKeep should be killed. -type podContainerChangesSpec struct { - StartInfraContainer bool - InfraChanged bool - InfraContainerId kubecontainer.DockerID - InitFailed bool - InitContainersToKeep map[kubecontainer.DockerID]int - ContainersToStart map[int]string - ContainersToKeep map[kubecontainer.DockerID]int -} - -func (dm *DockerManager) computePodContainerChanges(pod *v1.Pod, podStatus *kubecontainer.PodStatus) (podContainerChangesSpec, error) { - start := time.Now() - defer func() { - metrics.ContainerManagerLatency.WithLabelValues("computePodContainerChanges").Observe(metrics.SinceInMicroseconds(start)) - }() - glog.V(5).Infof("Syncing Pod %q: %#v", format.Pod(pod), pod) - - containersToStart := make(map[int]string) - containersToKeep := make(map[kubecontainer.DockerID]int) - - var err error - var podInfraContainerID kubecontainer.DockerID - var changed bool - podInfraContainerStatus := podStatus.FindContainerStatusByName(PodInfraContainerName) - if podInfraContainerStatus != nil && podInfraContainerStatus.State == kubecontainer.ContainerStateRunning { - glog.V(4).Infof("Found pod infra container for %q", format.Pod(pod)) - changed, err = dm.podInfraContainerChanged(pod, podInfraContainerStatus) - if err != nil { - return podContainerChangesSpec{}, err - } - } - - createPodInfraContainer := true - if podInfraContainerStatus == nil || podInfraContainerStatus.State != kubecontainer.ContainerStateRunning { - glog.V(2).Infof("Need to restart pod infra container for %q because it is not found", format.Pod(pod)) - } else if changed { - glog.V(2).Infof("Need to restart pod infra container for %q because it is changed", format.Pod(pod)) - } else { - glog.V(4).Infof("Pod infra container looks good, keep it %q", format.Pod(pod)) - createPodInfraContainer = false - podInfraContainerID = kubecontainer.DockerID(podInfraContainerStatus.ID.ID) - containersToKeep[podInfraContainerID] = -1 - } - - // check the status of the init containers - initFailed := false - initContainersToKeep := make(map[kubecontainer.DockerID]int) - // always reset the init containers if the pod is reset - if !createPodInfraContainer { - // keep all successfully completed containers up to and including the first failing container - Containers: - for i, container := range pod.Spec.InitContainers { - containerStatus := podStatus.FindContainerStatusByName(container.Name) - switch { - case containerStatus == nil: - continue - case containerStatus.State == kubecontainer.ContainerStateRunning: - initContainersToKeep[kubecontainer.DockerID(containerStatus.ID.ID)] = i - case containerStatus.State == kubecontainer.ContainerStateExited: - initContainersToKeep[kubecontainer.DockerID(containerStatus.ID.ID)] = i - // TODO: should we abstract the "did the init container fail" check? - if containerStatus.ExitCode != 0 { - initFailed = true - break Containers - } - } - } - } - - // check the status of the containers - for index, container := range pod.Spec.Containers { - - containerStatus := podStatus.FindContainerStatusByName(container.Name) - if containerStatus == nil || containerStatus.State != kubecontainer.ContainerStateRunning { - if kubecontainer.ShouldContainerBeRestarted(&container, pod, podStatus) { - // If we are here it means that the container is dead and should be restarted, or never existed and should - // be created. We may be inserting this ID again if the container has changed and it has - // RestartPolicy::Always, but it's not a big deal. - message := fmt.Sprintf("Container %+v is dead, but RestartPolicy says that we should restart it.", container) - glog.V(3).Info(message) - containersToStart[index] = message - } - continue - } - - containerID := kubecontainer.DockerID(containerStatus.ID.ID) - glog.V(3).Infof("pod %q container %q exists as %v", format.Pod(pod), container.Name, containerID) - - if createPodInfraContainer { - // createPodInfraContainer == true and Container exists - // If we're creating infra container everything will be killed anyway - // If RestartPolicy is Always or OnFailure we restart containers that were running before we - // killed them when restarting Infra Container. - if pod.Spec.RestartPolicy != v1.RestartPolicyNever { - message := fmt.Sprintf("Infra Container is being recreated. %q will be restarted.", container.Name) - glog.V(1).Info(message) - containersToStart[index] = message - } - continue - } - - if initFailed { - // initialization failed and Container exists - // If we have an initialization failure everything will be killed anyway - // If RestartPolicy is Always or OnFailure we restart containers that were running before we - // killed them when re-running initialization - if pod.Spec.RestartPolicy != v1.RestartPolicyNever { - message := fmt.Sprintf("Failed to initialize pod. %q will be restarted.", container.Name) - glog.V(1).Info(message) - containersToStart[index] = message - } - continue - } - - // At this point, the container is running and pod infra container is good. - // We will look for changes and check healthiness for the container. - expectedHash := kubecontainer.HashContainerLegacy(&container) - hash := containerStatus.Hash - containerChanged := hash != 0 && hash != expectedHash - if containerChanged { - message := fmt.Sprintf("pod %q container %q hash changed (%d vs %d), it will be killed and re-created.", format.Pod(pod), container.Name, hash, expectedHash) - glog.Info(message) - containersToStart[index] = message - continue - } - - liveness, found := dm.livenessManager.Get(containerStatus.ID) - if !found || liveness == proberesults.Success { - containersToKeep[containerID] = index - continue - } - if pod.Spec.RestartPolicy != v1.RestartPolicyNever { - message := fmt.Sprintf("pod %q container %q is unhealthy, it will be killed and re-created.", format.Pod(pod), container.Name) - glog.Info(message) - containersToStart[index] = message - } - } - - // After the loop one of the following should be true: - // - createPodInfraContainer is true and containersToKeep is empty. - // (In fact, when createPodInfraContainer is false, containersToKeep will not be touched). - // - createPodInfraContainer is false and containersToKeep contains at least ID of Infra Container - - // If Infra container is the last running one, we don't want to keep it, and we don't want to - // keep any init containers. - if !createPodInfraContainer && len(containersToStart) == 0 && len(containersToKeep) == 1 { - containersToKeep = make(map[kubecontainer.DockerID]int) - initContainersToKeep = make(map[kubecontainer.DockerID]int) - } - - return podContainerChangesSpec{ - StartInfraContainer: createPodInfraContainer, - InfraChanged: changed, - InfraContainerId: podInfraContainerID, - InitFailed: initFailed, - InitContainersToKeep: initContainersToKeep, - ContainersToStart: containersToStart, - ContainersToKeep: containersToKeep, - }, nil -} - -// Sync the running pod to match the specified desired pod. -func (dm *DockerManager) SyncPod(pod *v1.Pod, _ v1.PodStatus, podStatus *kubecontainer.PodStatus, pullSecrets []v1.Secret, backOff *flowcontrol.Backoff) (result kubecontainer.PodSyncResult) { - start := time.Now() - defer func() { - metrics.ContainerManagerLatency.WithLabelValues("SyncPod").Observe(metrics.SinceInMicroseconds(start)) - }() - - containerChanges, err := dm.computePodContainerChanges(pod, podStatus) - if err != nil { - result.Fail(err) - return - } - glog.V(3).Infof("Got container changes for pod %q: %+v", format.Pod(pod), containerChanges) - - if containerChanges.InfraChanged { - dm.recorder.Eventf(pod, v1.EventTypeNormal, "InfraChanged", "Pod infrastructure changed, it will be killed and re-created.") - } - if containerChanges.StartInfraContainer || (len(containerChanges.ContainersToKeep) == 0 && len(containerChanges.ContainersToStart) == 0) { - if len(containerChanges.ContainersToKeep) == 0 && len(containerChanges.ContainersToStart) == 0 { - glog.V(4).Infof("Killing Infra Container for %q because all other containers are dead.", format.Pod(pod)) - } else { - glog.V(4).Infof("Killing Infra Container for %q, will start new one", format.Pod(pod)) - } - - // Get list of running container(s) to kill - podToKill := kubecontainer.ConvertPodStatusToRunningPod(dm.Type(), podStatus) - - // If there are dead network containers, also kill them to ensure - // their network resources get released and are available to be - // re-used by new net containers - for _, containerStatus := range podStatus.ContainerStatuses { - if containerIsNetworked(containerStatus.Name) && containerStatus.State == kubecontainer.ContainerStateExited { - container := &kubecontainer.Container{ - ID: containerStatus.ID, - Name: containerStatus.Name, - Image: containerStatus.Image, - ImageID: containerStatus.ImageID, - Hash: containerStatus.Hash, - State: containerStatus.State, - } - podToKill.Containers = append(podToKill.Containers, container) - } - } - - // Killing phase: if we want to start new infra container, or nothing is running kill everything (including infra container) - // TODO(random-liu): We'll use pod status directly in the future - killResult := dm.killPodWithSyncResult(pod, podToKill, nil) - result.AddPodSyncResult(killResult) - if killResult.Error() != nil { - return - } - } else { - // Otherwise kill any running containers in this pod which are not specified as ones to keep. - runningContainerStatues := podStatus.GetRunningContainerStatuses() - for _, containerStatus := range runningContainerStatues { - _, keep := containerChanges.ContainersToKeep[kubecontainer.DockerID(containerStatus.ID.ID)] - _, keepInit := containerChanges.InitContainersToKeep[kubecontainer.DockerID(containerStatus.ID.ID)] - if !keep && !keepInit { - glog.V(3).Infof("Killing unwanted container %q(id=%q) for pod %q", containerStatus.Name, containerStatus.ID, format.Pod(pod)) - // attempt to find the appropriate container policy - var podContainer *v1.Container - var killMessage string - for i, c := range pod.Spec.Containers { - if c.Name == containerStatus.Name { - podContainer = &pod.Spec.Containers[i] - killMessage = containerChanges.ContainersToStart[i] - break - } - } - killContainerResult := kubecontainer.NewSyncResult(kubecontainer.KillContainer, containerStatus.Name) - result.AddSyncResult(killContainerResult) - if err := dm.KillContainerInPod(containerStatus.ID, podContainer, pod, killMessage, nil); err != nil { - killContainerResult.Fail(kubecontainer.ErrKillContainer, err.Error()) - glog.Errorf("Error killing container %q(id=%q) for pod %q: %v", containerStatus.Name, containerStatus.ID, format.Pod(pod), err) - return - } - } - } - } - - // Keep terminated init containers fairly aggressively controlled - dm.pruneInitContainersBeforeStart(pod, podStatus, containerChanges.InitContainersToKeep) - - // We pass the value of the podIP down to runContainerInPod, which in turn - // passes it to various other functions, in order to facilitate - // functionality that requires this value (hosts file and downward API) - // and avoid races determining the pod IP in cases where a container - // requires restart but the podIP isn't in the status manager yet. - // - // We default to the IP in the passed-in pod status, and overwrite it if the - // infra container needs to be (re)started. - podIP := "" - if podStatus != nil { - podIP = podStatus.IP - } - - // If we should create infra container then we do it first. - podInfraContainerID := containerChanges.InfraContainerId - if containerChanges.StartInfraContainer && (len(containerChanges.ContainersToStart) > 0) { - glog.V(4).Infof("Creating pod infra container for %q", format.Pod(pod)) - startContainerResult := kubecontainer.NewSyncResult(kubecontainer.StartContainer, PodInfraContainerName) - result.AddSyncResult(startContainerResult) - var msg string - podInfraContainerID, err, msg = dm.createPodInfraContainer(pod) - if err != nil { - startContainerResult.Fail(err, msg) - glog.Errorf("Failed to create pod infra container: %v; Skipping pod %q: %s", err, format.Pod(pod), msg) - return - } - - setupNetworkResult := kubecontainer.NewSyncResult(kubecontainer.SetupNetwork, kubecontainer.GetPodFullName(pod)) - result.AddSyncResult(setupNetworkResult) - if !kubecontainer.IsHostNetworkPod(pod) { - if err := dm.network.SetUpPod(pod.Namespace, pod.Name, podInfraContainerID.ContainerID(), pod.Annotations); err != nil { - setupNetworkResult.Fail(kubecontainer.ErrSetupNetwork, err.Error()) - glog.Error(err) - - // Delete infra container - killContainerResult := kubecontainer.NewSyncResult(kubecontainer.KillContainer, PodInfraContainerName) - result.AddSyncResult(killContainerResult) - if delErr := dm.KillContainerInPod(podInfraContainerID.ContainerID(), nil, pod, err.Error(), nil); delErr != nil { - killContainerResult.Fail(kubecontainer.ErrKillContainer, delErr.Error()) - glog.Warningf("Clear infra container failed for pod %q: %v", format.Pod(pod), delErr) - } - return - } - - // Setup the host interface unless the pod is on the host's network (FIXME: move to networkPlugin when ready) - podInfraContainer, err := dm.client.InspectContainer(string(podInfraContainerID)) - if err != nil { - glog.Errorf("Failed to inspect pod infra container: %v; Skipping pod %q", err, format.Pod(pod)) - result.Fail(err) - return - } - - if dm.configureHairpinMode { - if err = hairpin.SetUpContainerPid(podInfraContainer.State.Pid, knetwork.DefaultInterfaceName); err != nil { - glog.Warningf("Hairpin setup failed for pod %q: %v", format.Pod(pod), err) - } - } - - // Overwrite the podIP passed in the pod status, since we just started the infra container. - podIP, err = dm.determineContainerIP(pod.Namespace, pod.Name, podInfraContainer) - if err != nil { - glog.Errorf("Network error: %v; Skipping pod %q", err, format.Pod(pod)) - result.Fail(err) - return - } - glog.Infof("Determined pod ip after infra change: %q: %q", format.Pod(pod), podIP) - } - } - - next, status, done := findActiveInitContainer(pod, podStatus) - if status != nil { - if status.ExitCode != 0 { - // container initialization has failed, flag the pod as failed - initContainerResult := kubecontainer.NewSyncResult(kubecontainer.InitContainer, status.Name) - initContainerResult.Fail(kubecontainer.ErrRunInitContainer, fmt.Sprintf("init container %q exited with %d", status.Name, status.ExitCode)) - result.AddSyncResult(initContainerResult) - if pod.Spec.RestartPolicy == v1.RestartPolicyNever { - utilruntime.HandleError(fmt.Errorf("error running pod %q init container %q, restart=Never: %#v", format.Pod(pod), status.Name, status)) - return - } - utilruntime.HandleError(fmt.Errorf("Error running pod %q init container %q, restarting: %#v", format.Pod(pod), status.Name, status)) - } - } - - // Note: when configuring the pod's containers anything that can be configured by pointing - // to the namespace of the infra container should use namespaceMode. This includes things like the net namespace - // and IPC namespace. PID mode cannot point to another container right now. - // See createPodInfraContainer for infra container setup. - namespaceMode := fmt.Sprintf("container:%v", podInfraContainerID) - pidMode := getPidMode(pod) - - if next != nil { - if len(containerChanges.ContainersToStart) == 0 { - glog.V(4).Infof("No containers to start, stopping at init container %+v in pod %v", next.Name, format.Pod(pod)) - return - } - - // If we need to start the next container, do so now then exit - container := next - startContainerResult := kubecontainer.NewSyncResult(kubecontainer.StartContainer, container.Name) - result.AddSyncResult(startContainerResult) - - // containerChanges.StartInfraContainer causes the containers to be restarted for config reasons - if !containerChanges.StartInfraContainer { - isInBackOff, err, msg := dm.doBackOff(pod, container, podStatus, backOff) - if isInBackOff { - startContainerResult.Fail(err, msg) - glog.V(4).Infof("Backing Off restarting init container %+v in pod %v", container, format.Pod(pod)) - return - } - } - - glog.V(4).Infof("Creating init container %+v in pod %v", container, format.Pod(pod)) - if err, msg := dm.tryContainerStart(container, pod, podStatus, pullSecrets, namespaceMode, pidMode, podIP); err != nil { - startContainerResult.Fail(err, msg) - glog.V(3).Infof("container start failed: %v: %s", err, msg) - return - } - - // Successfully started the container; clear the entry in the failure - glog.V(4).Infof("Completed init container %q for pod %q", container.Name, format.Pod(pod)) - return - } - if !done { - // init container still running - glog.V(4).Infof("An init container is still running in pod %v", format.Pod(pod)) - return - } - if containerChanges.InitFailed { - // init container still running - glog.V(4).Infof("Not all init containers have succeeded for pod %v", format.Pod(pod)) - return - } - - // Start regular containers - for idx := range containerChanges.ContainersToStart { - container := &pod.Spec.Containers[idx] - startContainerResult := kubecontainer.NewSyncResult(kubecontainer.StartContainer, container.Name) - result.AddSyncResult(startContainerResult) - - // containerChanges.StartInfraContainer causes the containers to be restarted for config reasons - if !containerChanges.StartInfraContainer { - isInBackOff, err, msg := dm.doBackOff(pod, container, podStatus, backOff) - if isInBackOff { - startContainerResult.Fail(err, msg) - glog.V(4).Infof("Backing Off restarting container %+v in pod %v", container, format.Pod(pod)) - continue - } - } - - glog.V(4).Infof("Creating container %+v in pod %v", container, format.Pod(pod)) - if err, msg := dm.tryContainerStart(container, pod, podStatus, pullSecrets, namespaceMode, pidMode, podIP); err != nil { - startContainerResult.Fail(err, msg) - glog.V(3).Infof("container start failed: %v: %s", err, msg) - continue - } - } - return -} - -// tryContainerStart attempts to pull and start the container, returning an error and a reason string if the start -// was not successful. -func (dm *DockerManager) tryContainerStart(container *v1.Container, pod *v1.Pod, podStatus *kubecontainer.PodStatus, pullSecrets []v1.Secret, namespaceMode, pidMode, podIP string) (err error, reason string) { - imageRef, msg, err := dm.imagePuller.EnsureImageExists(pod, container, pullSecrets) - if err != nil { - return err, msg - } - - if container.SecurityContext != nil && container.SecurityContext.RunAsNonRoot != nil && *container.SecurityContext.RunAsNonRoot { - err := dm.verifyNonRoot(container) - if err != nil { - return kubecontainer.ErrVerifyNonRoot, err.Error() - } - } - - // For a new container, the RestartCount should be 0 - restartCount := 0 - containerStatus := podStatus.FindContainerStatusByName(container.Name) - if containerStatus != nil { - restartCount = containerStatus.RestartCount + 1 - } - - // Allow override of networking mode for specific platforms (e.g. Windows) - netMode := getNetworkingMode() - if netMode == "" { - // If not overriden, use the namespace mode - netMode = namespaceMode - } - - _, err = dm.runContainerInPod(pod, container, netMode, namespaceMode, pidMode, podIP, imageRef, restartCount) - if err != nil { - // TODO(bburns) : Perhaps blacklist a container after N failures? - return kubecontainer.ErrRunContainer, err.Error() - } - return nil, "" -} - -// pruneInitContainers ensures that before we begin creating init containers, we have reduced the number -// of outstanding init containers still present. This reduces load on the container garbage collector -// by only preserving the most recent terminated init container. -func (dm *DockerManager) pruneInitContainersBeforeStart(pod *v1.Pod, podStatus *kubecontainer.PodStatus, initContainersToKeep map[kubecontainer.DockerID]int) { - // only the last execution of each init container should be preserved, and only preserve it if it is in the - // list of init containers to keep. - initContainerNames := sets.NewString() - for _, container := range pod.Spec.InitContainers { - initContainerNames.Insert(container.Name) - } - for name := range initContainerNames { - count := 0 - for _, status := range podStatus.ContainerStatuses { - if status.Name != name || !initContainerNames.Has(status.Name) || status.State != kubecontainer.ContainerStateExited { - continue - } - count++ - // keep the first init container for this name - if count == 1 { - continue - } - // if there is a reason to preserve the older container, do so - if _, ok := initContainersToKeep[kubecontainer.DockerID(status.ID.ID)]; ok { - continue - } - - // prune all other init containers that match this container name - // TODO: we may not need aggressive pruning - glog.V(4).Infof("Removing init container %q instance %q %d", status.Name, status.ID.ID, count) - if err := dm.client.RemoveContainer(status.ID.ID, dockertypes.ContainerRemoveOptions{RemoveVolumes: true}); err != nil { - if IsContainerNotFoundError(err) { - count-- - continue - } - utilruntime.HandleError(fmt.Errorf("failed to remove pod init container %q: %v; Skipping pod %q", status.Name, err, format.Pod(pod))) - // TODO: report serious errors - continue - } - - // remove any references to this container - if _, ok := dm.containerRefManager.GetRef(status.ID); ok { - dm.containerRefManager.ClearRef(status.ID) - } else { - glog.Warningf("No ref for pod '%q'", pod.Name) - } - } - } -} - -// findActiveInitContainer returns the status of the last failed container, the next init container to -// start, or done if there are no further init containers. Status is only returned if an init container -// failed, in which case next will point to the current container. -func findActiveInitContainer(pod *v1.Pod, podStatus *kubecontainer.PodStatus) (next *v1.Container, status *kubecontainer.ContainerStatus, done bool) { - if len(pod.Spec.InitContainers) == 0 { - return nil, nil, true - } - - for i := len(pod.Spec.InitContainers) - 1; i >= 0; i-- { - container := &pod.Spec.InitContainers[i] - status := podStatus.FindContainerStatusByName(container.Name) - switch { - case status == nil: - continue - case status.State == kubecontainer.ContainerStateRunning: - return nil, nil, false - case status.State == kubecontainer.ContainerStateExited: - switch { - // the container has failed, we'll have to retry - case status.ExitCode != 0: - return &pod.Spec.InitContainers[i], status, false - // all init containers successful - case i == (len(pod.Spec.InitContainers) - 1): - return nil, nil, true - // all containers up to i successful, go to i+1 - default: - return &pod.Spec.InitContainers[i+1], nil, false - } - } - } - - return &pod.Spec.InitContainers[0], nil, false -} - -// verifyNonRoot returns an error if the container or image will run as the root user. -func (dm *DockerManager) verifyNonRoot(container *v1.Container) error { - if securitycontext.HasRunAsUser(container) { - if securitycontext.HasRootRunAsUser(container) { - return fmt.Errorf("container's runAsUser breaks non-root policy") - } - return nil - } - - imgRoot, err := dm.isImageRoot(container.Image) - if err != nil { - return fmt.Errorf("can't tell if image runs as root: %v", err) - } - if imgRoot { - return fmt.Errorf("container has no runAsUser and image will run as root") - } - - return nil -} - -// isImageRoot returns true if the user directive is not set on the image, the user is set to 0 -// or the user is set to root. If there is an error inspecting the image this method will return -// false and return the error. -func (dm *DockerManager) isImageRoot(image string) (bool, error) { - img, err := dm.client.InspectImageByRef(image) - if err != nil { - return false, err - } - if img == nil || img.Config == nil { - return false, fmt.Errorf("unable to inspect image %s, nil Config", image) - } - - user := GetUserFromImageUser(img.Config.User) - // if no user is defined container will run as root - if user == "" { - return true, nil - } - // do not allow non-numeric user directives - uid, err := strconv.Atoi(user) - if err != nil { - return false, fmt.Errorf("non-numeric user (%s) is not allowed", user) - } - // user is numeric, check for 0 - return uid == 0, nil -} - -// GetUserFromImageUser splits the user out of an user:group string. -func GetUserFromImageUser(id string) string { - if id == "" { - return id - } - // split instances where the id may contain user:group - if strings.Contains(id, ":") { - return strings.Split(id, ":")[0] - } - // no group, just return the id - return id -} - -// If all instances of a container are garbage collected, doBackOff will also return false, which means the container may be restarted before the -// backoff deadline. However, because that won't cause error and the chance is really slim, we can just ignore it for now. -// If a container is still in backoff, the function will return a brief backoff error and a detailed error message. -func (dm *DockerManager) doBackOff(pod *v1.Pod, container *v1.Container, podStatus *kubecontainer.PodStatus, backOff *flowcontrol.Backoff) (bool, error, string) { - var cStatus *kubecontainer.ContainerStatus - // Use the finished time of the latest exited container as the start point to calculate whether to do back-off. - // TODO(random-liu): Better define backoff start point; add unit and e2e test after we finalize this. (See github issue #22240) - for _, c := range podStatus.ContainerStatuses { - if c.Name == container.Name && c.State == kubecontainer.ContainerStateExited { - cStatus = c - break - } - } - if cStatus != nil { - glog.Infof("checking backoff for container %q in pod %q", container.Name, pod.Name) - ts := cStatus.FinishedAt - // found a container that requires backoff - dockerName := KubeletContainerName{ - PodFullName: kubecontainer.GetPodFullName(pod), - PodUID: pod.UID, - ContainerName: container.Name, - } - stableName, _, _ := BuildDockerName(dockerName, container) - if backOff.IsInBackOffSince(stableName, ts) { - if ref, err := kubecontainer.GenerateContainerRef(pod, container); err == nil { - dm.recorder.Eventf(ref, v1.EventTypeWarning, events.BackOffStartContainer, "Back-off restarting failed docker container") - } - err := fmt.Errorf("Back-off %s restarting failed container=%s pod=%s", backOff.Get(stableName), container.Name, format.Pod(pod)) - glog.Infof("%s", err.Error()) - return true, kubecontainer.ErrCrashLoopBackOff, err.Error() - } - backOff.Next(stableName, ts) - } - return false, nil, "" -} - -// getPidMode returns the pid mode to use on the docker container based on pod.Spec.HostPID. -func getPidMode(pod *v1.Pod) string { - pidMode := "" - if pod.Spec.HostPID { - pidMode = namespaceModeHost - } - return pidMode -} - -// getIPCMode returns the ipc mode to use on the docker container based on pod.Spec.HostIPC. -func getIPCMode(pod *v1.Pod) string { - ipcMode := "" - if pod.Spec.HostIPC { - ipcMode = namespaceModeHost - } - return ipcMode -} - -func (dm *DockerManager) DeleteContainer(containerID kubecontainer.ContainerID) error { - return dm.containerGC.deleteContainer(containerID.ID) -} - -// GetNetNS returns the network namespace path for the given container -func (dm *DockerManager) GetNetNS(containerID kubecontainer.ContainerID) (string, error) { - inspectResult, err := dm.client.InspectContainer(containerID.ID) - if err != nil { - glog.Errorf("Error inspecting container: '%v'", err) - return "", err - } - if inspectResult.State.Pid == 0 { - // Docker reports pid 0 for an exited container. We can't use it to - // check the network namespace, so return an empty string instead. - glog.V(4).Infof("Cannot find network namespace for the terminated container %q", containerID.ID) - return "", nil - } - - netnsPath := fmt.Sprintf(DockerNetnsFmt, inspectResult.State.Pid) - return netnsPath, nil -} - -func (dm *DockerManager) GetPodContainerID(pod *kubecontainer.Pod) (kubecontainer.ContainerID, error) { - for _, c := range pod.Containers { - if c.Name == PodInfraContainerName { - return c.ID, nil - } - } - - return kubecontainer.ContainerID{}, fmt.Errorf("Pod %s unknown to docker.", kubecontainer.BuildPodFullName(pod.Name, pod.Namespace)) -} - -// Garbage collection of dead containers -func (dm *DockerManager) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool) error { - return dm.containerGC.GarbageCollect(gcPolicy, allSourcesReady) -} - -func (dm *DockerManager) GetPodStatus(uid kubetypes.UID, name, namespace string) (*kubecontainer.PodStatus, error) { - podStatus := &kubecontainer.PodStatus{ID: uid, Name: name, Namespace: namespace} - // Now we retain restart count of container as a docker label. Each time a container - // restarts, pod will read the restart count from the registered dead container, increment - // it to get the new restart count, and then add a label with the new restart count on - // the newly started container. - // However, there are some limitations of this method: - // 1. When all dead containers were garbage collected, the container status could - // not get the historical value and would be *inaccurate*. Fortunately, the chance - // is really slim. - // 2. When working with old version containers which have no restart count label, - // we can only assume their restart count is 0. - // Anyhow, we only promised "best-effort" restart count reporting, we can just ignore - // these limitations now. - var containerStatuses []*kubecontainer.ContainerStatus - // We have added labels like pod name and pod namespace, it seems that we can do filtered list here. - // However, there may be some old containers without these labels, so at least now we can't do that. - // TODO(random-liu): Do only one list and pass in the list result in the future - // TODO(random-liu): Add filter when we are sure that all the containers have the labels - containers, err := dm.client.ListContainers(dockertypes.ContainerListOptions{All: true}) - if err != nil { - return podStatus, err - } - // Loop through list of running and exited docker containers to construct - // the statuses. We assume docker returns a list of containers sorted in - // reverse by time. - // TODO: optimization: set maximum number of containers per container name to examine. - for _, c := range containers { - if len(c.Names) == 0 { - continue - } - dockerName, _, err := ParseDockerName(c.Names[0]) - if err != nil { - continue - } - if dockerName.PodUID != uid { - continue - } - result, ip, err := dm.inspectContainer(c.ID, name, namespace) - if err != nil { - if IsContainerNotFoundError(err) { - // https://github.com/kubernetes/kubernetes/issues/22541 - // Sometimes when docker's state is corrupt, a container can be listed - // but couldn't be inspected. We fake a status for this container so - // that we can still return a status for the pod to sync. - result = &kubecontainer.ContainerStatus{ - ID: kubecontainer.DockerID(c.ID).ContainerID(), - Name: dockerName.ContainerName, - State: kubecontainer.ContainerStateUnknown, - } - glog.Errorf("Unable to inspect container %q: %v", c.ID, err) - } else { - return podStatus, err - } - } - containerStatuses = append(containerStatuses, result) - if containerProvidesPodIP(dockerName.ContainerName) && ip != "" { - podStatus.IP = ip - } - } - - podStatus.ContainerStatuses = containerStatuses - return podStatus, nil -} - -// getVersionInfo returns apiVersion & daemonVersion of docker runtime -func (dm *DockerManager) getVersionInfo() (versionInfo, error) { - apiVersion, err := dm.APIVersion() - if err != nil { - return versionInfo{}, err - } - daemonVersion, err := dm.Version() - if err != nil { - return versionInfo{}, err - } - return versionInfo{ - apiVersion: apiVersion, - daemonVersion: daemonVersion, - }, nil -} - -// Truncate the message if it exceeds max length. -func truncateMsg(msg string, max int) string { - if len(msg) <= max { - return msg - } - glog.V(2).Infof("Truncated %s", msg) - const truncatedMsg = "..TRUNCATED.." - begin := (max - len(truncatedMsg)) / 2 - end := len(msg) - (max - (len(truncatedMsg) + begin)) - return msg[:begin] + truncatedMsg + msg[end:] -} diff --git a/pkg/kubelet/dockertools/docker_manager_linux.go b/pkg/kubelet/dockertools/docker_manager_linux.go index 539ae0a2ca4..5ef8f461971 100644 --- a/pkg/kubelet/dockertools/docker_manager_linux.go +++ b/pkg/kubelet/dockertools/docker_manager_linux.go @@ -18,63 +18,6 @@ limitations under the License. package dockertools -import ( - dockertypes "github.com/docker/engine-api/types" - dockercontainer "github.com/docker/engine-api/types/container" - - "k8s.io/kubernetes/pkg/api/v1" - kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" -) - -// These two functions are OS specific (for now at least) -func updateHostConfig(hc *dockercontainer.HostConfig, opts *kubecontainer.RunContainerOptions) { - // no-op, there is a windows implementation that is different. -} - func DefaultMemorySwap() int64 { return 0 } - -func getContainerIP(container *dockertypes.ContainerJSON) string { - result := "" - if container.NetworkSettings != nil { - result = container.NetworkSettings.IPAddress - - // Fall back to IPv6 address if no IPv4 address is present - if result == "" { - result = container.NetworkSettings.GlobalIPv6Address - } - } - return result -} - -// We don't want to override the networking mode on Linux. -func getNetworkingMode() string { return "" } - -// Returns true if the container name matches the infrastructure's container name -func containerProvidesPodIP(containerName string) bool { - return containerName == PodInfraContainerName -} - -// Only the infrastructure container needs network setup/teardown -func containerIsNetworked(containerName string) bool { - return containerName == PodInfraContainerName -} - -// Returns Seccomp and AppArmor Security options -func (dm *DockerManager) getSecurityOpts(pod *v1.Pod, ctrName string) ([]dockerOpt, error) { - var securityOpts []dockerOpt - if seccompOpts, err := dm.getSeccompOpts(pod, ctrName); err != nil { - return nil, err - } else { - securityOpts = append(securityOpts, seccompOpts...) - } - - if appArmorOpts, err := dm.getAppArmorOpts(pod, ctrName); err != nil { - return nil, err - } else { - securityOpts = append(securityOpts, appArmorOpts...) - } - - return securityOpts, nil -} diff --git a/pkg/kubelet/dockertools/docker_manager_linux_test.go b/pkg/kubelet/dockertools/docker_manager_linux_test.go deleted file mode 100644 index a1a867f5cb3..00000000000 --- a/pkg/kubelet/dockertools/docker_manager_linux_test.go +++ /dev/null @@ -1,466 +0,0 @@ -// +build linux - -/* -Copyright 2016 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package dockertools - -import ( - "fmt" - "net" - "path" - "strconv" - "testing" - - "github.com/golang/mock/gomock" - "github.com/stretchr/testify/assert" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/tools/record" - "k8s.io/kubernetes/pkg/api/v1" - kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" - "k8s.io/kubernetes/pkg/kubelet/events" - "k8s.io/kubernetes/pkg/kubelet/network" - nettest "k8s.io/kubernetes/pkg/kubelet/network/testing" - "k8s.io/kubernetes/pkg/security/apparmor" - utilstrings "k8s.io/kubernetes/pkg/util/strings" -) - -func TestGetSecurityOpts(t *testing.T) { - const containerName = "bar" - pod := func(annotations map[string]string) *v1.Pod { - p := makePod("foo", &v1.PodSpec{ - Containers: []v1.Container{ - {Name: containerName}, - }, - }) - p.Annotations = annotations - return p - } - - tests := []struct { - msg string - pod *v1.Pod - expectedOpts []string - }{{ - msg: "No security annotations", - pod: pod(nil), - expectedOpts: []string{"seccomp=unconfined"}, - }, { - msg: "Seccomp default", - pod: pod(map[string]string{ - v1.SeccompContainerAnnotationKeyPrefix + containerName: "docker/default", - }), - expectedOpts: nil, - }, { - msg: "AppArmor runtime/default", - pod: pod(map[string]string{ - apparmor.ContainerAnnotationKeyPrefix + containerName: apparmor.ProfileRuntimeDefault, - }), - expectedOpts: []string{"seccomp=unconfined"}, - }, { - msg: "AppArmor local profile", - pod: pod(map[string]string{ - apparmor.ContainerAnnotationKeyPrefix + containerName: apparmor.ProfileNamePrefix + "foo", - }), - expectedOpts: []string{"seccomp=unconfined", "apparmor=foo"}, - }, { - msg: "AppArmor and seccomp profile", - pod: pod(map[string]string{ - v1.SeccompContainerAnnotationKeyPrefix + containerName: "docker/default", - apparmor.ContainerAnnotationKeyPrefix + containerName: apparmor.ProfileNamePrefix + "foo", - }), - expectedOpts: []string{"apparmor=foo"}, - }} - - dm, _ := newTestDockerManagerWithVersion("1.11.1", "1.23") - for i, test := range tests { - securityOpts, err := dm.getSecurityOpts(test.pod, containerName) - assert.NoError(t, err, "TestCase[%d]: %s", i, test.msg) - opts := FmtDockerOpts(securityOpts, '=') - assert.Len(t, opts, len(test.expectedOpts), "TestCase[%d]: %s", i, test.msg) - for _, opt := range test.expectedOpts { - assert.Contains(t, opts, opt, "TestCase[%d]: %s", i, test.msg) - } - } -} - -func TestSeccompIsUnconfinedByDefaultWithDockerV110(t *testing.T) { - dm, fakeDocker := newTestDockerManagerWithVersion("1.10.1", "1.22") - // We want to capture events. - recorder := record.NewFakeRecorder(20) - dm.recorder = recorder - - pod := makePod("foo", &v1.PodSpec{ - Containers: []v1.Container{ - {Name: "bar"}, - }, - }) - - runSyncPod(t, dm, fakeDocker, pod, nil, false) - - verifyCalls(t, fakeDocker, []string{ - // Create pod infra container. - "create", "start", "inspect_container", "inspect_container", - // Create container. - "create", "start", "inspect_container", - }) - - assert.NoError(t, fakeDocker.AssertCreatedByNameWithOrder([]string{"POD", "bar"})) - - newContainer, err := fakeDocker.InspectContainer(fakeDocker.Created[1]) - if err != nil { - t.Fatalf("unexpected error %v", err) - } - assert.Contains(t, newContainer.HostConfig.SecurityOpt, "seccomp:unconfined", "Pods with Docker versions >= 1.10 must not have seccomp disabled by default") - - cid := utilstrings.ShortenString(fakeDocker.Created[1], 12) - assert.NoError(t, expectEvent(recorder, v1.EventTypeNormal, events.CreatedContainer, - fmt.Sprintf("Created container with docker id %s; Security:[seccomp=unconfined]", cid))) -} - -func TestUnconfinedSeccompProfileWithDockerV110(t *testing.T) { - dm, fakeDocker := newTestDockerManagerWithVersion("1.10.1", "1.22") - pod := makePod("foo4", &v1.PodSpec{ - Containers: []v1.Container{ - {Name: "bar4"}, - }, - }) - pod.Annotations = map[string]string{ - v1.SeccompPodAnnotationKey: "unconfined", - } - - runSyncPod(t, dm, fakeDocker, pod, nil, false) - - verifyCalls(t, fakeDocker, []string{ - // Create pod infra container. - "create", "start", "inspect_container", "inspect_container", - // Create container. - "create", "start", "inspect_container", - }) - - assert.NoError(t, fakeDocker.AssertCreatedByNameWithOrder([]string{"POD", "bar4"})) - - newContainer, err := fakeDocker.InspectContainer(fakeDocker.Created[1]) - if err != nil { - t.Fatalf("unexpected error %v", err) - } - assert.Contains(t, newContainer.HostConfig.SecurityOpt, "seccomp:unconfined", "Pods created with a secccomp annotation of unconfined should have seccomp:unconfined.") -} - -func TestDefaultSeccompProfileWithDockerV110(t *testing.T) { - dm, fakeDocker := newTestDockerManagerWithVersion("1.10.1", "1.22") - pod := makePod("foo1", &v1.PodSpec{ - Containers: []v1.Container{ - {Name: "bar1"}, - }, - }) - pod.Annotations = map[string]string{ - v1.SeccompPodAnnotationKey: "docker/default", - } - - runSyncPod(t, dm, fakeDocker, pod, nil, false) - - verifyCalls(t, fakeDocker, []string{ - // Create pod infra container. - "create", "start", "inspect_container", "inspect_container", - // Create container. - "create", "start", "inspect_container", - }) - - assert.NoError(t, fakeDocker.AssertCreatedByNameWithOrder([]string{"POD", "bar1"})) - - newContainer, err := fakeDocker.InspectContainer(fakeDocker.Created[1]) - if err != nil { - t.Fatalf("unexpected error %v", err) - } - assert.NotContains(t, newContainer.HostConfig.SecurityOpt, "seccomp:unconfined", "Pods created with a secccomp annotation of docker/default should have empty security opt.") -} - -func TestSeccompContainerAnnotationTrumpsPod(t *testing.T) { - dm, fakeDocker := newTestDockerManagerWithVersion("1.10.1", "1.22") - pod := makePod("foo2", &v1.PodSpec{ - Containers: []v1.Container{ - {Name: "bar2"}, - }, - }) - pod.Annotations = map[string]string{ - v1.SeccompPodAnnotationKey: "unconfined", - v1.SeccompContainerAnnotationKeyPrefix + "bar2": "docker/default", - } - - runSyncPod(t, dm, fakeDocker, pod, nil, false) - - verifyCalls(t, fakeDocker, []string{ - // Create pod infra container. - "create", "start", "inspect_container", "inspect_container", - // Create container. - "create", "start", "inspect_container", - }) - - assert.NoError(t, fakeDocker.AssertCreatedByNameWithOrder([]string{"POD", "bar2"})) - - newContainer, err := fakeDocker.InspectContainer(fakeDocker.Created[1]) - if err != nil { - t.Fatalf("unexpected error %v", err) - } - assert.NotContains(t, newContainer.HostConfig.SecurityOpt, "seccomp:unconfined", "Container annotation should trump the pod annotation for seccomp.") -} - -func TestSecurityOptsAreNilWithDockerV19(t *testing.T) { - dm, fakeDocker := newTestDockerManagerWithVersion("1.9.1", "1.21") - pod := makePod("foo", &v1.PodSpec{ - Containers: []v1.Container{ - {Name: "bar"}, - }, - }) - - runSyncPod(t, dm, fakeDocker, pod, nil, false) - - verifyCalls(t, fakeDocker, []string{ - // Create pod infra container. - "create", "start", "inspect_container", "inspect_container", - // Create container. - "create", "start", "inspect_container", - }) - - assert.NoError(t, fakeDocker.AssertCreatedByNameWithOrder([]string{"POD", "bar"})) - - newContainer, err := fakeDocker.InspectContainer(fakeDocker.Created[1]) - if err != nil { - t.Fatalf("unexpected error %v", err) - } - assert.NotContains(t, newContainer.HostConfig.SecurityOpt, "seccomp:unconfined", "Pods with Docker versions < 1.10 must not have seccomp disabled by default") -} - -func TestCreateAppArmorContanier(t *testing.T) { - dm, fakeDocker := newTestDockerManagerWithVersion("1.11.1", "1.23") - // We want to capture events. - recorder := record.NewFakeRecorder(20) - dm.recorder = recorder - - pod := &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - UID: "12345678", - Name: "foo", - Namespace: "new", - Annotations: map[string]string{ - apparmor.ContainerAnnotationKeyPrefix + "test": apparmor.ProfileNamePrefix + "test-profile", - }, - }, - Spec: v1.PodSpec{ - Containers: []v1.Container{ - {Name: "test"}, - }, - }, - } - - runSyncPod(t, dm, fakeDocker, pod, nil, false) - - verifyCalls(t, fakeDocker, []string{ - // Create pod infra container. - "create", "start", "inspect_container", "inspect_container", - // Create container. - "create", "start", "inspect_container", - }) - - assert.NoError(t, fakeDocker.AssertCreatedByNameWithOrder([]string{"POD", "test"})) - - // Verify security opts. - newContainer, err := fakeDocker.InspectContainer(fakeDocker.Created[1]) - if err != nil { - t.Fatalf("unexpected error %v", err) - } - securityOpts := newContainer.HostConfig.SecurityOpt - assert.Contains(t, securityOpts, "apparmor=test-profile", "Container should have apparmor security opt") - - cid := utilstrings.ShortenString(fakeDocker.Created[1], 12) - assert.NoError(t, expectEvent(recorder, v1.EventTypeNormal, events.CreatedContainer, - fmt.Sprintf("Created container with docker id %s; Security:[seccomp=unconfined apparmor=test-profile]", cid))) -} - -func TestSeccompLocalhostProfileIsLoaded(t *testing.T) { - tests := []struct { - annotations map[string]string - expectedSecOpt string - expectedSecMsg string - expectedError string - }{ - { - annotations: map[string]string{ - v1.SeccompPodAnnotationKey: "localhost/test", - }, - expectedSecOpt: `seccomp={"foo":"bar"}`, - expectedSecMsg: "seccomp=test(md5:21aeae45053385adebd25311f9dd9cb1)", - }, - { - annotations: map[string]string{ - v1.SeccompPodAnnotationKey: "localhost/sub/subtest", - }, - expectedSecOpt: `seccomp={"abc":"def"}`, - expectedSecMsg: "seccomp=sub/subtest(md5:07c9bcb4db631f7ca191d6e0bca49f76)", - }, - { - annotations: map[string]string{ - v1.SeccompPodAnnotationKey: "localhost/not-existing", - }, - expectedError: "cannot load seccomp profile", - }, - } - - for i, test := range tests { - dm, fakeDocker := newTestDockerManagerWithVersion("1.11.0", "1.23") - // We want to capture events. - recorder := record.NewFakeRecorder(20) - dm.recorder = recorder - - dm.seccompProfileRoot = path.Join("fixtures", "seccomp") - - pod := makePod("foo2", &v1.PodSpec{ - Containers: []v1.Container{ - {Name: "bar2"}, - }, - }) - pod.Annotations = test.annotations - - result := runSyncPod(t, dm, fakeDocker, pod, nil, test.expectedError != "") - if test.expectedError != "" { - assert.Contains(t, result.Error().Error(), test.expectedError) - continue - } - - verifyCalls(t, fakeDocker, []string{ - // Create pod infra container. - "create", "start", "inspect_container", "inspect_container", - // Create container. - "create", "start", "inspect_container", - }) - - assert.NoError(t, fakeDocker.AssertCreatedByNameWithOrder([]string{"POD", "bar2"})) - - newContainer, err := fakeDocker.InspectContainer(fakeDocker.Created[1]) - if err != nil { - t.Fatalf("unexpected error %v", err) - } - assert.Contains(t, newContainer.HostConfig.SecurityOpt, test.expectedSecOpt, "The compacted seccomp json profile should be loaded.") - - cid := utilstrings.ShortenString(fakeDocker.Created[1], 12) - assert.NoError(t, expectEvent(recorder, v1.EventTypeNormal, events.CreatedContainer, - fmt.Sprintf("Created container with docker id %s; Security:[%s]", cid, test.expectedSecMsg)), - "testcase %d", i) - } -} - -func TestGetPodStatusFromNetworkPlugin(t *testing.T) { - cases := []struct { - pod *v1.Pod - fakePodIP string - containerID string - infraContainerID string - networkStatusError error - expectRunning bool - expectUnknown bool - }{ - { - pod: &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - UID: "12345678", - Name: "foo", - Namespace: "new", - }, - Spec: v1.PodSpec{ - Containers: []v1.Container{{Name: "container"}}, - }, - }, - fakePodIP: "10.10.10.10", - containerID: "123", - infraContainerID: "9876", - networkStatusError: nil, - expectRunning: true, - expectUnknown: false, - }, - { - pod: &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - UID: "12345678", - Name: "foo", - Namespace: "new", - }, - Spec: v1.PodSpec{ - Containers: []v1.Container{{Name: "container"}}, - }, - }, - fakePodIP: "", - containerID: "123", - infraContainerID: "9876", - networkStatusError: fmt.Errorf("CNI plugin error"), - expectRunning: false, - expectUnknown: true, - }, - } - for _, test := range cases { - dm, fakeDocker := newTestDockerManager() - ctrl := gomock.NewController(t) - defer ctrl.Finish() - fnp := nettest.NewMockNetworkPlugin(ctrl) - dm.network = network.NewPluginManager(fnp) - - fakeDocker.SetFakeRunningContainers([]*FakeContainer{ - { - ID: test.containerID, - Name: fmt.Sprintf("/k8s_container_%s_%s_%s_42", test.pod.Name, test.pod.Namespace, test.pod.UID), - Running: true, - }, - { - ID: test.infraContainerID, - Name: fmt.Sprintf("/k8s_POD.%s_%s_%s_%s_42", strconv.FormatUint(generatePodInfraContainerHash(test.pod), 16), test.pod.Name, test.pod.Namespace, test.pod.UID), - Running: true, - }, - }) - - fnp.EXPECT().Name().Return("someNetworkPlugin").AnyTimes() - var podNetworkStatus *network.PodNetworkStatus - if test.fakePodIP != "" { - podNetworkStatus = &network.PodNetworkStatus{IP: net.ParseIP(test.fakePodIP)} - } - fnp.EXPECT().GetPodNetworkStatus(test.pod.Namespace, test.pod.Name, kubecontainer.DockerID(test.infraContainerID).ContainerID()).Return(podNetworkStatus, test.networkStatusError) - - podStatus, err := dm.GetPodStatus(test.pod.UID, test.pod.Name, test.pod.Namespace) - if err != nil { - t.Fatal(err) - } - if podStatus.IP != test.fakePodIP { - t.Errorf("Got wrong ip, expected %v, got %v", test.fakePodIP, podStatus.IP) - } - - expectedStatesCount := 0 - var expectedState kubecontainer.ContainerState - if test.expectRunning { - expectedState = kubecontainer.ContainerStateRunning - } else if test.expectUnknown { - expectedState = kubecontainer.ContainerStateUnknown - } else { - t.Errorf("Some state has to be expected") - } - for _, containerStatus := range podStatus.ContainerStatuses { - if containerStatus.State == expectedState { - expectedStatesCount++ - } - } - if expectedStatesCount < 1 { - t.Errorf("Invalid count of containers with expected state") - } - } -} diff --git a/pkg/kubelet/dockertools/docker_manager_test.go b/pkg/kubelet/dockertools/docker_manager_test.go deleted file mode 100644 index d1b98a0b381..00000000000 --- a/pkg/kubelet/dockertools/docker_manager_test.go +++ /dev/null @@ -1,1908 +0,0 @@ -/* -Copyright 2014 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package dockertools - -import ( - "flag" - "fmt" - "io/ioutil" - "net" - "net/http" - "os" - "reflect" - "regexp" - "sort" - "strconv" - "strings" - "testing" - "time" - - dockertypes "github.com/docker/engine-api/types" - dockercontainer "github.com/docker/engine-api/types/container" - dockerstrslice "github.com/docker/engine-api/types/strslice" - "github.com/golang/mock/gomock" - cadvisorapi "github.com/google/cadvisor/info/v1" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - apiequality "k8s.io/apimachinery/pkg/api/equality" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - kubetypes "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/intstr" - "k8s.io/apimachinery/pkg/util/sets" - "k8s.io/client-go/tools/record" - "k8s.io/client-go/util/clock" - "k8s.io/client-go/util/flowcontrol" - "k8s.io/kubernetes/pkg/api/testapi" - "k8s.io/kubernetes/pkg/api/v1" - "k8s.io/kubernetes/pkg/apis/componentconfig" - kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" - containertest "k8s.io/kubernetes/pkg/kubelet/container/testing" - "k8s.io/kubernetes/pkg/kubelet/images" - "k8s.io/kubernetes/pkg/kubelet/network" - nettest "k8s.io/kubernetes/pkg/kubelet/network/testing" - proberesults "k8s.io/kubernetes/pkg/kubelet/prober/results" - "k8s.io/kubernetes/pkg/kubelet/types" - uexec "k8s.io/kubernetes/pkg/util/exec" -) - -var testTempDir string - -func TestMain(m *testing.M) { - dir, err := ioutil.TempDir("", "dockertools") - if err != nil { - panic(err) - } - testTempDir = dir - - flag.Parse() - status := m.Run() - os.RemoveAll(testTempDir) - os.Exit(status) -} - -type fakeHTTP struct { - url string - err error -} - -func (f *fakeHTTP) Get(url string) (*http.Response, error) { - f.url = url - return nil, f.err -} - -type fakeImageManager struct{} - -func newFakeImageManager() images.ImageManager { - return &fakeImageManager{} -} - -func (m *fakeImageManager) EnsureImageExists(pod *v1.Pod, container *v1.Container, pullSecrets []v1.Secret) (string, string, error) { - return container.Image, "", nil -} - -func createTestDockerManager(fakeHTTPClient *fakeHTTP, fakeDocker *FakeDockerClient) (*DockerManager, *FakeDockerClient) { - if fakeHTTPClient == nil { - fakeHTTPClient = &fakeHTTP{} - } - if fakeDocker == nil { - fakeDocker = NewFakeDockerClient() - } - fakeRecorder := &record.FakeRecorder{} - containerRefManager := kubecontainer.NewRefManager() - networkPlugin, _ := network.InitNetworkPlugin( - []network.NetworkPlugin{}, - "", - nettest.NewFakeHost(nil), - componentconfig.HairpinNone, - "10.0.0.0/8", - network.UseDefaultMTU) - - dockerManager := NewFakeDockerManager( - fakeDocker, - fakeRecorder, - proberesults.NewManager(), - containerRefManager, - &cadvisorapi.MachineInfo{}, - "", - 0, 0, "", - &containertest.FakeOS{}, - networkPlugin, - &containertest.FakeRuntimeHelper{}, - fakeHTTPClient, - flowcontrol.NewBackOff(time.Second, 300*time.Second)) - - return dockerManager, fakeDocker -} - -func createTestDockerManagerWithFakeImageManager(fakeHTTPClient *fakeHTTP, fakeDocker *FakeDockerClient) (*DockerManager, *FakeDockerClient) { - dm, fd := createTestDockerManager(fakeHTTPClient, fakeDocker) - dm.imagePuller = newFakeImageManager() - return dm, fd -} - -func newTestDockerManagerWithRealImageManager() (*DockerManager, *FakeDockerClient) { - return createTestDockerManager(nil, nil) -} -func newTestDockerManagerWithHTTPClient(fakeHTTPClient *fakeHTTP) (*DockerManager, *FakeDockerClient) { - return createTestDockerManagerWithFakeImageManager(fakeHTTPClient, nil) -} - -func newTestDockerManagerWithVersion(version, apiVersion string) (*DockerManager, *FakeDockerClient) { - fakeDocker := NewFakeDockerClient().WithVersion(version, apiVersion) - return createTestDockerManagerWithFakeImageManager(nil, fakeDocker) -} - -func newTestDockerManager() (*DockerManager, *FakeDockerClient) { - return createTestDockerManagerWithFakeImageManager(nil, nil) -} - -func matchString(t *testing.T, pattern, str string) bool { - match, err := regexp.MatchString(pattern, str) - if err != nil { - t.Logf("unexpected error: %v", err) - } - return match -} - -func TestSetEntrypointAndCommand(t *testing.T) { - cases := []struct { - name string - container *v1.Container - envs []kubecontainer.EnvVar - expected *dockertypes.ContainerCreateConfig - }{ - { - name: "none", - container: &v1.Container{}, - expected: &dockertypes.ContainerCreateConfig{ - Config: &dockercontainer.Config{}, - }, - }, - { - name: "command", - container: &v1.Container{ - Command: []string{"foo", "bar"}, - }, - expected: &dockertypes.ContainerCreateConfig{ - Config: &dockercontainer.Config{ - Entrypoint: dockerstrslice.StrSlice([]string{"foo", "bar"}), - }, - }, - }, - { - name: "command expanded", - container: &v1.Container{ - Command: []string{"foo", "$(VAR_TEST)", "$(VAR_TEST2)"}, - }, - envs: []kubecontainer.EnvVar{ - { - Name: "VAR_TEST", - Value: "zoo", - }, - { - Name: "VAR_TEST2", - Value: "boo", - }, - }, - expected: &dockertypes.ContainerCreateConfig{ - Config: &dockercontainer.Config{ - Entrypoint: dockerstrslice.StrSlice([]string{"foo", "zoo", "boo"}), - }, - }, - }, - { - name: "args", - container: &v1.Container{ - Args: []string{"foo", "bar"}, - }, - expected: &dockertypes.ContainerCreateConfig{ - Config: &dockercontainer.Config{ - Cmd: []string{"foo", "bar"}, - }, - }, - }, - { - name: "args expanded", - container: &v1.Container{ - Args: []string{"zap", "$(VAR_TEST)", "$(VAR_TEST2)"}, - }, - envs: []kubecontainer.EnvVar{ - { - Name: "VAR_TEST", - Value: "hap", - }, - { - Name: "VAR_TEST2", - Value: "trap", - }, - }, - expected: &dockertypes.ContainerCreateConfig{ - Config: &dockercontainer.Config{ - Cmd: dockerstrslice.StrSlice([]string{"zap", "hap", "trap"}), - }, - }, - }, - { - name: "both", - container: &v1.Container{ - Command: []string{"foo"}, - Args: []string{"bar", "baz"}, - }, - expected: &dockertypes.ContainerCreateConfig{ - Config: &dockercontainer.Config{ - Entrypoint: dockerstrslice.StrSlice([]string{"foo"}), - Cmd: dockerstrslice.StrSlice([]string{"bar", "baz"}), - }, - }, - }, - { - name: "both expanded", - container: &v1.Container{ - Command: []string{"$(VAR_TEST2)--$(VAR_TEST)", "foo", "$(VAR_TEST3)"}, - Args: []string{"foo", "$(VAR_TEST)", "$(VAR_TEST2)"}, - }, - envs: []kubecontainer.EnvVar{ - { - Name: "VAR_TEST", - Value: "zoo", - }, - { - Name: "VAR_TEST2", - Value: "boo", - }, - { - Name: "VAR_TEST3", - Value: "roo", - }, - }, - expected: &dockertypes.ContainerCreateConfig{ - Config: &dockercontainer.Config{ - Entrypoint: dockerstrslice.StrSlice([]string{"boo--zoo", "foo", "roo"}), - Cmd: dockerstrslice.StrSlice([]string{"foo", "zoo", "boo"}), - }, - }, - }, - } - - for _, tc := range cases { - opts := &kubecontainer.RunContainerOptions{ - Envs: tc.envs, - } - - actualOpts := dockertypes.ContainerCreateConfig{ - Config: &dockercontainer.Config{}, - } - setEntrypointAndCommand(tc.container, opts, actualOpts) - - if e, a := tc.expected.Config.Entrypoint, actualOpts.Config.Entrypoint; !apiequality.Semantic.DeepEqual(e, a) { - t.Errorf("%v: unexpected entrypoint: expected %v, got %v", tc.name, e, a) - } - if e, a := tc.expected.Config.Cmd, actualOpts.Config.Cmd; !apiequality.Semantic.DeepEqual(e, a) { - t.Errorf("%v: unexpected command: expected %v, got %v", tc.name, e, a) - } - } -} - -// verifyPods returns true if the two pod slices are equal. -func verifyPods(a, b []*kubecontainer.Pod) bool { - if len(a) != len(b) { - return false - } - - // Sort the containers within a pod. - for i := range a { - sort.Sort(containersByID(a[i].Containers)) - } - for i := range b { - sort.Sort(containersByID(b[i].Containers)) - } - - // Sort the pods by UID. - sort.Sort(podsByID(a)) - sort.Sort(podsByID(b)) - - return reflect.DeepEqual(a, b) -} - -func TestGetPods(t *testing.T) { - manager, fakeDocker := newTestDockerManager() - dockerContainers := []*FakeContainer{ - { - ID: "1111", - Name: "/k8s_foo_qux_new_1234_42", - }, - { - ID: "2222", - Name: "/k8s_bar_qux_new_1234_42", - }, - { - ID: "3333", - Name: "/k8s_bar_jlk_wen_5678_42", - }, - } - - // Convert the docker containers. This does not affect the test coverage - // because the conversion is tested separately in convert_test.go - containers := make([]*kubecontainer.Container, len(dockerContainers)) - for i := range containers { - c, err := toRuntimeContainer(&dockertypes.Container{ - ID: dockerContainers[i].ID, - Names: []string{dockerContainers[i].Name}, - }) - if err != nil { - t.Fatalf("unexpected error %v", err) - } - containers[i] = c - } - - expected := []*kubecontainer.Pod{ - { - ID: kubetypes.UID("1234"), - Name: "qux", - Namespace: "new", - Containers: []*kubecontainer.Container{containers[0], containers[1]}, - }, - { - ID: kubetypes.UID("5678"), - Name: "jlk", - Namespace: "wen", - Containers: []*kubecontainer.Container{containers[2]}, - }, - } - - fakeDocker.SetFakeRunningContainers(dockerContainers) - actual, err := manager.GetPods(false) - if err != nil { - t.Fatalf("unexpected error %v", err) - } - if !verifyPods(expected, actual) { - t.Errorf("expected %#v, got %#v", expected, actual) - } -} - -func TestListImages(t *testing.T) { - manager, fakeDocker := newTestDockerManager() - dockerImages := []dockertypes.Image{{ID: "1111"}, {ID: "2222"}, {ID: "3333"}} - expected := sets.NewString([]string{"1111", "2222", "3333"}...) - - fakeDocker.Images = dockerImages - actualImages, err := manager.ListImages() - if err != nil { - t.Fatalf("unexpected error %v", err) - } - actual := sets.NewString() - for _, i := range actualImages { - actual.Insert(i.ID) - } - // We can compare the two sets directly because util.StringSet.List() - // returns a "sorted" list. - if !reflect.DeepEqual(expected.List(), actual.List()) { - t.Errorf("expected %#v, got %#v", expected.List(), actual.List()) - } -} - -func TestDeleteImage(t *testing.T) { - manager, fakeDocker := newTestDockerManager() - fakeDocker.InjectImages([]dockertypes.Image{{ID: "1111", RepoTags: []string{"foo"}}}) - manager.RemoveImage(kubecontainer.ImageSpec{Image: "1111"}) - fakeDocker.AssertCallDetails(NewCalledDetail("inspect_image", nil), NewCalledDetail("remove_image", - []interface{}{"1111", dockertypes.ImageRemoveOptions{PruneChildren: true}})) -} - -func TestDeleteImageWithMultipleTags(t *testing.T) { - manager, fakeDocker := newTestDockerManager() - fakeDocker.InjectImages([]dockertypes.Image{{ID: "1111", RepoTags: []string{"foo", "bar"}}}) - manager.RemoveImage(kubecontainer.ImageSpec{Image: "1111"}) - fakeDocker.AssertCallDetails(NewCalledDetail("inspect_image", nil), - NewCalledDetail("remove_image", []interface{}{"foo", dockertypes.ImageRemoveOptions{PruneChildren: true}}), - NewCalledDetail("remove_image", []interface{}{"bar", dockertypes.ImageRemoveOptions{PruneChildren: true}})) -} - -func TestKillContainerInPod(t *testing.T) { - manager, fakeDocker := newTestDockerManager() - - pod := makePod("qux", nil) - containers := []*FakeContainer{ - { - ID: "1111", - Name: "/k8s_foo_qux_new_1234_42", - }, - { - ID: "2222", - Name: "/k8s_bar_qux_new_1234_42", - }, - } - containerToKill := containers[0] - containerToSpare := containers[1] - - fakeDocker.SetFakeRunningContainers(containers) - - if err := manager.KillContainerInPod(kubecontainer.ContainerID{}, &pod.Spec.Containers[0], pod, "test kill container in pod.", nil); err != nil { - t.Errorf("unexpected error: %v", err) - } - // Assert the container has been stopped. - if err := fakeDocker.AssertStopped([]string{containerToKill.ID}); err != nil { - t.Errorf("container was not stopped correctly: %v", err) - } - // Assert the container has been spared. - if err := fakeDocker.AssertStopped([]string{containerToSpare.ID}); err == nil { - t.Errorf("container unexpectedly stopped: %v", containerToSpare.ID) - } -} - -func TestKillContainerInPodWithPreStop(t *testing.T) { - manager, fakeDocker := newTestDockerManager() - fakeDocker.ExecInspect = &dockertypes.ContainerExecInspect{ - Running: false, - ExitCode: 0, - } - expectedCmd := []string{"foo.sh", "bar"} - pod := makePod("qux", &v1.PodSpec{ - Containers: []v1.Container{ - { - Name: "foo", - Lifecycle: &v1.Lifecycle{ - PreStop: &v1.Handler{ - Exec: &v1.ExecAction{ - Command: expectedCmd, - }, - }, - }, - }, - {Name: "bar"}}}) - - podString, err := runtime.Encode(testapi.Default.Codec(), pod) - if err != nil { - t.Errorf("unexpected error: %v", err) - } - containers := []*FakeContainer{ - { - ID: "1111", - Name: "/k8s_foo_qux_new_1234_42", - Config: &dockercontainer.Config{ - Labels: map[string]string{ - kubernetesPodLabel: string(podString), - types.KubernetesContainerNameLabel: "foo", - }, - }, - }, - { - ID: "2222", - Name: "/k8s_bar_qux_new_1234_42", - }, - } - containerToKill := containers[0] - fakeDocker.SetFakeRunningContainers(containers) - - if err := manager.KillContainerInPod(kubecontainer.ContainerID{}, &pod.Spec.Containers[0], pod, "test kill container with preStop.", nil); err != nil { - t.Errorf("unexpected error: %v", err) - } - // Assert the container has been stopped. - if err := fakeDocker.AssertStopped([]string{containerToKill.ID}); err != nil { - t.Errorf("container was not stopped correctly: %v", err) - } - verifyCalls(t, fakeDocker, []string{"list", "inspect_container", "create_exec", "start_exec", "stop"}) - if !reflect.DeepEqual(expectedCmd, fakeDocker.execCmd) { - t.Errorf("expected: %v, got %v", expectedCmd, fakeDocker.execCmd) - } -} - -func TestKillContainerInPodWithError(t *testing.T) { - manager, fakeDocker := newTestDockerManager() - - pod := makePod("qux", nil) - containers := []*FakeContainer{ - { - ID: "1111", - Name: "/k8s_foo_qux_new_1234_42", - }, - { - ID: "2222", - Name: "/k8s_bar_qux_new_1234_42", - }, - } - fakeDocker.SetFakeRunningContainers(containers) - fakeDocker.InjectError("stop", fmt.Errorf("sample error")) - - if err := manager.KillContainerInPod(kubecontainer.ContainerID{}, &pod.Spec.Containers[0], pod, "test kill container with error.", nil); err == nil { - t.Errorf("expected error, found nil") - } -} - -func TestIsAExitError(t *testing.T) { - var err error - err = &dockerExitError{nil} - _, ok := err.(uexec.ExitError) - if !ok { - t.Error("couldn't cast dockerExitError to exec.ExitError") - } -} - -func generatePodInfraContainerHash(pod *v1.Pod) uint64 { - var ports []v1.ContainerPort - if pod.Spec.SecurityContext == nil || !pod.Spec.HostNetwork { - for _, container := range pod.Spec.Containers { - ports = append(ports, container.Ports...) - } - } - - container := &v1.Container{ - Name: PodInfraContainerName, - Image: "", - Ports: ports, - ImagePullPolicy: podInfraContainerImagePullPolicy, - } - return kubecontainer.HashContainerLegacy(container) -} - -// runSyncPod is a helper function to retrieve the running pods from the fake -// docker client and runs SyncPod for the given pod. -func runSyncPod(t *testing.T, dm *DockerManager, fakeDocker *FakeDockerClient, pod *v1.Pod, backOff *flowcontrol.Backoff, expectErr bool) kubecontainer.PodSyncResult { - podStatus, err := dm.GetPodStatus(pod.UID, pod.Name, pod.Namespace) - if err != nil { - t.Errorf("unexpected error: %v", err) - } - fakeDocker.ClearCalls() - if backOff == nil { - backOff = flowcontrol.NewBackOff(time.Second, time.Minute) - } - // v1.PodStatus is not used in SyncPod now, pass in an empty one. - result := dm.SyncPod(pod, v1.PodStatus{}, podStatus, []v1.Secret{}, backOff) - err = result.Error() - if err != nil && !expectErr { - t.Errorf("unexpected error: %v", err) - } else if err == nil && expectErr { - t.Errorf("expected error didn't occur") - } - return result -} - -func TestSyncPodCreateNetAndContainer(t *testing.T) { - dm, fakeDocker := newTestDockerManager() - dm.podInfraContainerImage = "pod_infra_image" - - pod := makePod("foo", &v1.PodSpec{ - Containers: []v1.Container{ - {Name: "bar"}, - }, - }) - - runSyncPod(t, dm, fakeDocker, pod, nil, false) - verifyCalls(t, fakeDocker, []string{ - // Create pod infra container. - "create", "start", "inspect_container", "inspect_container", - // Create container. - "create", "start", "inspect_container", - }) - fakeDocker.Lock() - - found := false - for _, c := range fakeDocker.RunningContainerList { - if c.Image == "pod_infra_image" && strings.HasPrefix(c.Names[0], "/k8s_POD") { - found = true - break - } - } - if !found { - t.Errorf("Custom pod infra container not found: %v", fakeDocker.RunningContainerList) - } - fakeDocker.Unlock() - - assert.NoError(t, fakeDocker.AssertCreatedByNameWithOrder([]string{"POD", "bar"})) -} - -func TestSyncPodCreatesNetAndContainerPullsImage(t *testing.T) { - dm, fakeDocker := newTestDockerManagerWithRealImageManager() - dm.podInfraContainerImage = "foo/infra_image:v1" - pod := makePod("foo", &v1.PodSpec{ - Containers: []v1.Container{ - {Name: "bar", Image: "foo/something:v0", ImagePullPolicy: "IfNotPresent"}, - }, - }) - - runSyncPod(t, dm, fakeDocker, pod, nil, false) - - verifyCalls(t, fakeDocker, []string{ - // Create pod infra container. - "inspect_image", "pull", "inspect_image", "create", "start", "inspect_container", "inspect_container", - // Create container. - "inspect_image", "pull", "inspect_image", "create", "start", "inspect_container", - }) - - assert.NoError(t, fakeDocker.AssertImagesPulled([]string{"foo/infra_image:v1", "foo/something:v0"})) - assert.NoError(t, fakeDocker.AssertCreatedByNameWithOrder([]string{"POD", "bar"})) -} - -func TestSyncPodWithPodInfraCreatesContainer(t *testing.T) { - dm, fakeDocker := newTestDockerManager() - pod := makePod("foo", &v1.PodSpec{ - Containers: []v1.Container{ - {Name: "bar"}, - }, - }) - - fakeDocker.SetFakeRunningContainers([]*FakeContainer{{ - ID: "9876", - // Pod infra container. - Name: "/k8s_POD." + strconv.FormatUint(generatePodInfraContainerHash(pod), 16) + "_foo_new_12345678_0", - }}) - runSyncPod(t, dm, fakeDocker, pod, nil, false) - - verifyCalls(t, fakeDocker, []string{ - // Create container. - "create", "start", "inspect_container", - }) - - assert.NoError(t, fakeDocker.AssertCreatedByName([]string{"bar"})) -} - -func TestSyncPodDeletesWithNoPodInfraContainer(t *testing.T) { - dm, fakeDocker := newTestDockerManager() - pod := makePod("foo1", &v1.PodSpec{ - Containers: []v1.Container{ - {Name: "bar1"}, - }, - }) - fakeDocker.SetFakeRunningContainers([]*FakeContainer{{ - ID: "1234", - Name: "/k8s_bar1_foo1_new_12345678_0", - }}) - - runSyncPod(t, dm, fakeDocker, pod, nil, false) - - verifyCalls(t, fakeDocker, []string{ - // Kill the container since pod infra container is not running. - "stop", - // Create pod infra container. - "create", "start", "inspect_container", "inspect_container", - // Create container. - "create", "start", "inspect_container", - }) - - assert.NoError(t, fakeDocker.AssertStopped([]string{"1234"})) -} - -func TestSyncPodDeletesDuplicate(t *testing.T) { - dm, fakeDocker := newTestDockerManager() - pod := makePod("bar", &v1.PodSpec{ - Containers: []v1.Container{ - {Name: "foo"}, - }, - }) - - fakeDocker.SetFakeRunningContainers([]*FakeContainer{ - { - ID: "1234", - Name: "/k8s_foo_bar_new_12345678_1111", - }, - { - ID: "9876", - Name: "/k8s_POD." + strconv.FormatUint(generatePodInfraContainerHash(pod), 16) + "_bar_new_12345678_2222", - }, - { - ID: "4567", - Name: "/k8s_foo_bar_new_12345678_3333", - }}) - - runSyncPod(t, dm, fakeDocker, pod, nil, false) - - verifyCalls(t, fakeDocker, []string{ - // Kill the duplicated container. - "stop", - }) - // Expect one of the duplicates to be killed. - if len(fakeDocker.Stopped) != 1 || (fakeDocker.Stopped[0] != "1234" && fakeDocker.Stopped[0] != "4567") { - t.Errorf("Wrong containers were stopped: %v", fakeDocker.Stopped) - } -} - -func TestSyncPodBadHash(t *testing.T) { - dm, fakeDocker := newTestDockerManager() - pod := makePod("foo", &v1.PodSpec{ - Containers: []v1.Container{ - {Name: "bar"}, - }, - }) - - fakeDocker.SetFakeRunningContainers([]*FakeContainer{ - { - ID: "1234", - Name: "/k8s_bar.1234_foo_new_12345678_42", - }, - { - ID: "9876", - Name: "/k8s_POD." + strconv.FormatUint(generatePodInfraContainerHash(pod), 16) + "_foo_new_12345678_42", - }}) - runSyncPod(t, dm, fakeDocker, pod, nil, false) - - verifyCalls(t, fakeDocker, []string{ - // Kill and restart the bad hash container. - "stop", "create", "start", "inspect_container", - }) - - if err := fakeDocker.AssertStopped([]string{"1234"}); err != nil { - t.Errorf("%v", err) - } -} - -func TestSyncPodsUnhealthy(t *testing.T) { - const ( - unhealthyContainerID = "1234" - infraContainerID = "9876" - ) - dm, fakeDocker := newTestDockerManager() - pod := makePod("foo", &v1.PodSpec{ - Containers: []v1.Container{{Name: "unhealthy"}}, - }) - - fakeDocker.SetFakeRunningContainers([]*FakeContainer{ - { - ID: unhealthyContainerID, - Name: "/k8s_unhealthy_foo_new_12345678_42", - }, - { - ID: infraContainerID, - Name: "/k8s_POD." + strconv.FormatUint(generatePodInfraContainerHash(pod), 16) + "_foo_new_12345678_42", - }}) - dm.livenessManager.Set(kubecontainer.DockerID(unhealthyContainerID).ContainerID(), proberesults.Failure, pod) - - runSyncPod(t, dm, fakeDocker, pod, nil, false) - - verifyCalls(t, fakeDocker, []string{ - // Kill the unhealthy container. - "stop", - // Restart the unhealthy container. - "create", "start", "inspect_container", - }) - - if err := fakeDocker.AssertStopped([]string{unhealthyContainerID}); err != nil { - t.Errorf("%v", err) - } -} - -func TestSyncPodsDoesNothing(t *testing.T) { - dm, fakeDocker := newTestDockerManager() - container := v1.Container{Name: "bar"} - pod := makePod("foo", &v1.PodSpec{ - Containers: []v1.Container{ - container, - }, - }) - fakeDocker.SetFakeRunningContainers([]*FakeContainer{ - { - ID: "1234", - Name: "/k8s_bar." + strconv.FormatUint(kubecontainer.HashContainerLegacy(&container), 16) + "_foo_new_12345678_0", - }, - { - ID: "9876", - Name: "/k8s_POD." + strconv.FormatUint(generatePodInfraContainerHash(pod), 16) + "_foo_new_12345678_0", - }}) - - runSyncPod(t, dm, fakeDocker, pod, nil, false) - - verifyCalls(t, fakeDocker, []string{}) -} - -func TestSyncPodWithRestartPolicy(t *testing.T) { - dm, fakeDocker := newTestDockerManager() - containers := []v1.Container{ - {Name: "succeeded"}, - {Name: "failed"}, - } - pod := makePod("foo", &v1.PodSpec{ - Containers: containers, - }) - dockerContainers := []*FakeContainer{ - { - ID: "9876", - Name: "/k8s_POD." + strconv.FormatUint(generatePodInfraContainerHash(pod), 16) + "_foo_new_12345678_0", - StartedAt: time.Now(), - Running: true, - }, - { - ID: "1234", - Name: "/k8s_succeeded." + strconv.FormatUint(kubecontainer.HashContainerLegacy(&containers[0]), 16) + "_foo_new_12345678_0", - ExitCode: 0, - StartedAt: time.Now(), - FinishedAt: time.Now(), - }, - { - ID: "5678", - Name: "/k8s_failed." + strconv.FormatUint(kubecontainer.HashContainerLegacy(&containers[1]), 16) + "_foo_new_12345678_0", - ExitCode: 42, - StartedAt: time.Now(), - FinishedAt: time.Now(), - }} - - tests := []struct { - policy v1.RestartPolicy - calls []string - created []string - stopped []string - }{ - { - v1.RestartPolicyAlways, - []string{ - // Restart both containers. - "create", "start", "inspect_container", "create", "start", "inspect_container", - }, - []string{"succeeded", "failed"}, - []string{}, - }, - { - v1.RestartPolicyOnFailure, - []string{ - // Restart the failed container. - "create", "start", "inspect_container", - }, - []string{"failed"}, - []string{}, - }, - { - v1.RestartPolicyNever, - []string{ - // Check the pod infra container. - "inspect_container", "inspect_container", - // Stop the last pod infra container. - "stop", - }, - []string{}, - []string{"9876"}, - }, - } - - for i, tt := range tests { - fakeDocker.SetFakeContainers(dockerContainers) - pod.Spec.RestartPolicy = tt.policy - runSyncPod(t, dm, fakeDocker, pod, nil, false) - // 'stop' is because the pod infra container is killed when no container is running. - verifyCalls(t, fakeDocker, tt.calls) - - if err := fakeDocker.AssertCreatedByName(tt.created); err != nil { - t.Errorf("case [%d]: %v", i, err) - } - if err := fakeDocker.AssertStopped(tt.stopped); err != nil { - t.Errorf("case [%d]: %v", i, err) - } - } -} - -func TestSyncPodBackoff(t *testing.T) { - var fakeClock = clock.NewFakeClock(time.Now()) - startTime := fakeClock.Now() - - dm, fakeDocker := newTestDockerManager() - containers := []v1.Container{ - {Name: "good"}, - {Name: "bad"}, - } - pod := makePod("podfoo", &v1.PodSpec{ - Containers: containers, - }) - - stableId := "k8s_bad." + strconv.FormatUint(kubecontainer.HashContainerLegacy(&containers[1]), 16) + "_podfoo_new_12345678" - dockerContainers := []*FakeContainer{ - { - ID: "9876", - Name: "/k8s_POD." + strconv.FormatUint(generatePodInfraContainerHash(pod), 16) + "_podfoo_new_12345678_0", - StartedAt: startTime, - Running: true, - }, - { - ID: "1234", - Name: "/k8s_good." + strconv.FormatUint(kubecontainer.HashContainerLegacy(&containers[0]), 16) + "_podfoo_new_12345678_0", - StartedAt: startTime, - Running: true, - }, - { - ID: "5678", - Name: "/k8s_bad." + strconv.FormatUint(kubecontainer.HashContainerLegacy(&containers[1]), 16) + "_podfoo_new_12345678_0", - ExitCode: 42, - StartedAt: startTime, - FinishedAt: fakeClock.Now(), - }, - } - - startCalls := []string{"create", "start", "inspect_container"} - backOffCalls := []string{} - startResult := &kubecontainer.SyncResult{Action: kubecontainer.StartContainer, Target: "bad", Error: nil, Message: ""} - backoffResult := &kubecontainer.SyncResult{Action: kubecontainer.StartContainer, Target: "bad", Error: kubecontainer.ErrCrashLoopBackOff, Message: ""} - tests := []struct { - tick int - backoff int - killDelay int - result []string - expectErr bool - }{ - {1, 1, 1, startCalls, false}, - {2, 2, 2, startCalls, false}, - {3, 2, 3, backOffCalls, true}, - {4, 4, 4, startCalls, false}, - {5, 4, 5, backOffCalls, true}, - {6, 4, 6, backOffCalls, true}, - {7, 4, 7, backOffCalls, true}, - {8, 8, 129, startCalls, false}, - {130, 1, 0, startCalls, false}, - } - - backOff := flowcontrol.NewBackOff(time.Second, time.Minute) - backOff.Clock = fakeClock - for _, c := range tests { - fakeDocker.SetFakeContainers(dockerContainers) - fakeClock.SetTime(startTime.Add(time.Duration(c.tick) * time.Second)) - - result := runSyncPod(t, dm, fakeDocker, pod, backOff, c.expectErr) - verifyCalls(t, fakeDocker, c.result) - - // Verify whether the correct sync pod result is generated - if c.expectErr { - verifySyncResults(t, []*kubecontainer.SyncResult{backoffResult}, result) - } else { - verifySyncResults(t, []*kubecontainer.SyncResult{startResult}, result) - } - - if backOff.Get(stableId) != time.Duration(c.backoff)*time.Second { - t.Errorf("At tick %s expected backoff=%s got=%s", time.Duration(c.tick)*time.Second, time.Duration(c.backoff)*time.Second, backOff.Get(stableId)) - } - - if len(fakeDocker.Created) > 0 { - // pretend kill the container - fakeDocker.Created = nil - dockerContainers[2].FinishedAt = startTime.Add(time.Duration(c.killDelay) * time.Second) - } - } -} - -func TestGetRestartCount(t *testing.T) { - dm, fakeDocker := newTestDockerManager() - containerName := "bar" - pod := *makePod("foo", &v1.PodSpec{ - Containers: []v1.Container{ - {Name: containerName}, - }, - RestartPolicy: "Always", - }) - pod.Status = v1.PodStatus{ - ContainerStatuses: []v1.ContainerStatus{ - { - Name: containerName, - RestartCount: 3, - }, - }, - } - - // Helper function for verifying the restart count. - verifyRestartCount := func(pod *v1.Pod, expectedCount int) { - runSyncPod(t, dm, fakeDocker, pod, nil, false) - status, err := dm.GetPodStatus(pod.UID, pod.Name, pod.Namespace) - if err != nil { - t.Fatalf("unexpected error %v", err) - } - cs := status.FindContainerStatusByName(containerName) - if cs == nil { - t.Fatalf("Can't find status for container %q", containerName) - } - restartCount := cs.RestartCount - if restartCount != expectedCount { - t.Errorf("expected %d restart count, got %d", expectedCount, restartCount) - } - } - - killOneContainer := func(pod *v1.Pod) { - status, err := dm.GetPodStatus(pod.UID, pod.Name, pod.Namespace) - if err != nil { - t.Fatalf("unexpected error %v", err) - } - cs := status.FindContainerStatusByName(containerName) - if cs == nil { - t.Fatalf("Can't find status for container %q", containerName) - } - dm.KillContainerInPod(cs.ID, &pod.Spec.Containers[0], pod, "test container restart count.", nil) - } - // Container "bar" starts the first time. - // TODO: container lists are expected to be sorted reversely by time. - // We should fix FakeDockerClient to sort the list before returning. - // (randome-liu) Just partially sorted now. - verifyRestartCount(&pod, 0) - killOneContainer(&pod) - - // Poor container "bar" has been killed, and should be restarted with restart count 1 - verifyRestartCount(&pod, 1) - killOneContainer(&pod) - - // Poor container "bar" has been killed again, and should be restarted with restart count 2 - verifyRestartCount(&pod, 2) - killOneContainer(&pod) - - // Poor container "bar" has been killed again ang again, and should be restarted with restart count 3 - verifyRestartCount(&pod, 3) - - // The oldest container has been garbage collected - exitedContainers := fakeDocker.ExitedContainerList - fakeDocker.ExitedContainerList = exitedContainers[:len(exitedContainers)-1] - verifyRestartCount(&pod, 3) - - // The last two oldest containers have been garbage collected - fakeDocker.ExitedContainerList = exitedContainers[:len(exitedContainers)-2] - verifyRestartCount(&pod, 3) - - // All exited containers have been garbage collected, restart count should be got from old api pod status - fakeDocker.ExitedContainerList = []dockertypes.Container{} - verifyRestartCount(&pod, 3) - killOneContainer(&pod) - - // Poor container "bar" has been killed again ang again and again, and should be restarted with restart count 4 - verifyRestartCount(&pod, 4) -} - -func TestGetTerminationMessagePath(t *testing.T) { - dm, fakeDocker := newTestDockerManager() - containers := []v1.Container{ - { - Name: "bar", - TerminationMessagePath: "/dev/somepath", - }, - } - pod := makePod("foo", &v1.PodSpec{ - Containers: containers, - }) - - runSyncPod(t, dm, fakeDocker, pod, nil, false) - - containerList := fakeDocker.RunningContainerList - if len(containerList) != 2 { - // One for infra container, one for container "bar" - t.Fatalf("unexpected container list length %d", len(containerList)) - } - inspectResult, err := fakeDocker.InspectContainer(containerList[0].ID) - if err != nil { - t.Fatalf("unexpected inspect error: %v", err) - } - containerInfo := getContainerInfoFromLabel(inspectResult.Config.Labels) - terminationMessagePath := containerInfo.TerminationMessagePath - if terminationMessagePath != containers[0].TerminationMessagePath { - t.Errorf("expected termination message path %s, got %s", containers[0].TerminationMessagePath, terminationMessagePath) - } -} - -func TestSyncPodWithPodInfraCreatesContainerCallsHandler(t *testing.T) { - fakeHTTPClient := &fakeHTTP{} - dm, fakeDocker := newTestDockerManagerWithHTTPClient(fakeHTTPClient) - - pod := makePod("foo", &v1.PodSpec{ - Containers: []v1.Container{ - { - Name: "bar", - Lifecycle: &v1.Lifecycle{ - PostStart: &v1.Handler{ - HTTPGet: &v1.HTTPGetAction{ - Host: "foo", - Port: intstr.FromInt(8080), - Path: "bar", - }, - }, - }, - }, - }, - }) - fakeDocker.SetFakeRunningContainers([]*FakeContainer{{ - ID: "9876", - Name: "/k8s_POD." + strconv.FormatUint(generatePodInfraContainerHash(pod), 16) + "_foo_new_12345678_0", - }}) - runSyncPod(t, dm, fakeDocker, pod, nil, false) - - verifyCalls(t, fakeDocker, []string{ - // Create container. - "create", "start", "inspect_container", - }) - - assert.NoError(t, fakeDocker.AssertCreatedByName([]string{"bar"})) - - if fakeHTTPClient.url != "http://foo:8080/bar" { - t.Errorf("unexpected handler: %q", fakeHTTPClient.url) - } -} - -func TestSyncPodEventHandlerFails(t *testing.T) { - // Simulate HTTP failure. - fakeHTTPClient := &fakeHTTP{err: fmt.Errorf("test error")} - dm, fakeDocker := newTestDockerManagerWithHTTPClient(fakeHTTPClient) - - pod := makePod("foo", &v1.PodSpec{ - Containers: []v1.Container{ - {Name: "bar", - Lifecycle: &v1.Lifecycle{ - PostStart: &v1.Handler{ - HTTPGet: &v1.HTTPGetAction{ - Host: "does.no.exist", - Port: intstr.FromInt(8080), - Path: "bar", - }, - }, - }, - }, - }, - }) - - fakeDocker.SetFakeRunningContainers([]*FakeContainer{{ - ID: "9876", - Name: "/k8s_POD." + strconv.FormatUint(generatePodInfraContainerHash(pod), 16) + "_foo_new_12345678_0", - }}) - runSyncPod(t, dm, fakeDocker, pod, nil, true) - - verifyCalls(t, fakeDocker, []string{ - // Create the container. - "create", "start", - // Kill the container since event handler fails. - "stop", - }) - - assert.NoError(t, fakeDocker.AssertStoppedByName([]string{"bar"})) -} - -type fakeReadWriteCloser struct{} - -func (*fakeReadWriteCloser) Read([]byte) (int, error) { return 0, nil } -func (*fakeReadWriteCloser) Write([]byte) (int, error) { return 0, nil } -func (*fakeReadWriteCloser) Close() error { return nil } - -func TestPortForwardNoSuchContainer(t *testing.T) { - dm, _ := newTestDockerManager() - - podName, podNamespace := "podName", "podNamespace" - err := dm.PortForward( - &kubecontainer.Pod{ - ID: "podID", - Name: podName, - Namespace: podNamespace, - Containers: nil, - }, - 5000, - // need a valid io.ReadWriteCloser here - &fakeReadWriteCloser{}, - ) - if err == nil { - t.Fatal("unexpected non-error") - } - expectedErr := noPodInfraContainerError(podName, podNamespace) - if !reflect.DeepEqual(err, expectedErr) { - t.Fatalf("expected %v, but saw %v", expectedErr, err) - } -} - -func TestSyncPodWithTerminationLog(t *testing.T) { - dm, fakeDocker := newTestDockerManager() - // Set test pod container directory. - testPodContainerDir := "test/pod/container/dir" - dm.runtimeHelper.(*containertest.FakeRuntimeHelper).PodContainerDir = testPodContainerDir - container := v1.Container{ - Name: "bar", - TerminationMessagePath: "/dev/somepath", - } - pod := makePod("foo", &v1.PodSpec{ - Containers: []v1.Container{ - container, - }, - }) - - runSyncPod(t, dm, fakeDocker, pod, nil, false) - verifyCalls(t, fakeDocker, []string{ - // Create pod infra container. - "create", "start", "inspect_container", "inspect_container", - // Create container. - "create", "start", "inspect_container", - }) - - defer os.Remove(testPodContainerDir) - assert.NoError(t, fakeDocker.AssertCreatedByNameWithOrder([]string{"POD", "bar"})) - - newContainer, err := fakeDocker.InspectContainer(fakeDocker.Created[1]) - if err != nil { - t.Fatalf("unexpected error %v", err) - } - parts := strings.Split(newContainer.HostConfig.Binds[0], ":") - if !matchString(t, testPodContainerDir+"/[a-f0-9]", parts[0]) { - t.Errorf("unexpected host path: %s", parts[0]) - } - if parts[1] != "/dev/somepath" { - t.Errorf("unexpected container path: %s", parts[1]) - } -} - -func TestSyncPodWithHostNetwork(t *testing.T) { - dm, fakeDocker := newTestDockerManager() - pod := makePod("foo", &v1.PodSpec{ - Containers: []v1.Container{ - {Name: "bar"}, - }, - HostNetwork: true, - }) - - runSyncPod(t, dm, fakeDocker, pod, nil, false) - - verifyCalls(t, fakeDocker, []string{ - // Create pod infra container. - "create", "start", "inspect_container", - // Create container. - "create", "start", "inspect_container", - }) - - assert.NoError(t, fakeDocker.AssertCreatedByNameWithOrder([]string{"POD", "bar"})) - - newContainer, err := fakeDocker.InspectContainer(fakeDocker.Created[1]) - if err != nil { - t.Fatalf("unexpected error %v", err) - } - utsMode := newContainer.HostConfig.UTSMode - if utsMode != "host" { - t.Errorf("Pod with host network must have \"host\" utsMode, actual: \"%v\"", utsMode) - } -} - -func TestVerifyNonRoot(t *testing.T) { - dm, fakeDocker := newTestDockerManager() - - // setup test cases. - var rootUid int64 = 0 - var nonRootUid int64 = 1 - - tests := map[string]struct { - container *v1.Container - inspectImage *dockertypes.ImageInspect - expectedError string - }{ - // success cases - "non-root runAsUser": { - container: &v1.Container{ - Image: "foobar", - SecurityContext: &v1.SecurityContext{ - RunAsUser: &nonRootUid, - }, - }, - }, - "numeric non-root image user": { - container: &v1.Container{Image: "foobar"}, - inspectImage: &dockertypes.ImageInspect{ - ID: "foobar", - Config: &dockercontainer.Config{ - User: "1", - }, - }, - }, - "numeric non-root image user with gid": { - container: &v1.Container{Image: "foobar"}, - inspectImage: &dockertypes.ImageInspect{ - ID: "foobar", - Config: &dockercontainer.Config{ - User: "1:2", - }, - }, - }, - - // failure cases - "root runAsUser": { - container: &v1.Container{ - Image: "foobar", - SecurityContext: &v1.SecurityContext{ - RunAsUser: &rootUid, - }, - }, - expectedError: "container's runAsUser breaks non-root policy", - }, - "non-numeric image user": { - container: &v1.Container{Image: "foobar"}, - inspectImage: &dockertypes.ImageInspect{ - ID: "foobar", - Config: &dockercontainer.Config{ - User: "foo", - }, - }, - expectedError: "non-numeric user", - }, - "numeric root image user": { - container: &v1.Container{Image: "foobar"}, - inspectImage: &dockertypes.ImageInspect{ - ID: "foobar", - Config: &dockercontainer.Config{ - User: "0", - }, - }, - expectedError: "container has no runAsUser and image will run as root", - }, - "numeric root image user with gid": { - container: &v1.Container{Image: "foobar"}, - inspectImage: &dockertypes.ImageInspect{ - ID: "foobar", - Config: &dockercontainer.Config{ - User: "0:1", - }, - }, - expectedError: "container has no runAsUser and image will run as root", - }, - "nil image in inspect": { - container: &v1.Container{Image: "foobar"}, - inspectImage: nil, - expectedError: ImageNotFoundError{"foobar"}.Error(), - }, - "nil config in image inspect": { - container: &v1.Container{Image: "foobar"}, - inspectImage: &dockertypes.ImageInspect{ID: "foobar"}, - expectedError: "unable to inspect image", - }, - } - - for k, v := range tests { - fakeDocker.ResetImages() - if v.inspectImage != nil { - fakeDocker.InjectImageInspects([]dockertypes.ImageInspect{*v.inspectImage}) - } - err := dm.verifyNonRoot(v.container) - if v.expectedError == "" && err != nil { - t.Errorf("case[%q]: unexpected error: %v", k, err) - } - if v.expectedError != "" && !strings.Contains(err.Error(), v.expectedError) { - t.Errorf("case[%q]: expected: %q, got: %q", k, v.expectedError, err.Error()) - } - } -} - -func TestGetUserFromImageUser(t *testing.T) { - tests := map[string]struct { - input string - expect string - }{ - "no gid": { - input: "0", - expect: "0", - }, - "uid/gid": { - input: "0:1", - expect: "0", - }, - "empty input": { - input: "", - expect: "", - }, - "multiple spearators": { - input: "1:2:3", - expect: "1", - }, - "root username": { - input: "root:root", - expect: "root", - }, - "username": { - input: "test:test", - expect: "test", - }, - } - for k, v := range tests { - actual := GetUserFromImageUser(v.input) - if actual != v.expect { - t.Errorf("%s failed. Expected %s but got %s", k, v.expect, actual) - } - } -} - -func TestGetPidMode(t *testing.T) { - // test false - pod := &v1.Pod{} - pidMode := getPidMode(pod) - - if pidMode != "" { - t.Errorf("expected empty pid mode for pod but got %v", pidMode) - } - - // test true - pod.Spec.SecurityContext = &v1.PodSecurityContext{} - pod.Spec.HostPID = true - pidMode = getPidMode(pod) - if pidMode != "host" { - t.Errorf("expected host pid mode for pod but got %v", pidMode) - } -} - -func TestGetIPCMode(t *testing.T) { - // test false - pod := &v1.Pod{} - ipcMode := getIPCMode(pod) - - if ipcMode != "" { - t.Errorf("expected empty ipc mode for pod but got %v", ipcMode) - } - - // test true - pod.Spec.SecurityContext = &v1.PodSecurityContext{} - pod.Spec.HostIPC = true - ipcMode = getIPCMode(pod) - if ipcMode != "host" { - t.Errorf("expected host ipc mode for pod but got %v", ipcMode) - } -} - -func TestSyncPodWithPullPolicy(t *testing.T) { - dm, fakeDocker := newTestDockerManagerWithRealImageManager() - fakeDocker.InjectImages([]dockertypes.Image{{ID: "foo/existing_one:v1"}, {ID: "foo/want:latest"}}) - - dm.podInfraContainerImage = "foo/infra_image:v1" - - pod := makePod("foo", &v1.PodSpec{ - Containers: []v1.Container{ - {Name: "bar", Image: "foo/pull_always_image:v1", ImagePullPolicy: v1.PullAlways}, - {Name: "bar2", Image: "foo/pull_if_not_present_image:v1", ImagePullPolicy: v1.PullIfNotPresent}, - {Name: "bar3", Image: "foo/existing_one:v1", ImagePullPolicy: v1.PullIfNotPresent}, - {Name: "bar4", Image: "foo/want:latest", ImagePullPolicy: v1.PullIfNotPresent}, - {Name: "bar5", Image: "foo/pull_never_image:v1", ImagePullPolicy: v1.PullNever}, - }, - }) - - expectedResults := []*kubecontainer.SyncResult{ - //Sync result for infra container - {kubecontainer.StartContainer, PodInfraContainerName, nil, ""}, - {kubecontainer.SetupNetwork, kubecontainer.GetPodFullName(pod), nil, ""}, - //Sync result for user containers - {kubecontainer.StartContainer, "bar", nil, ""}, - {kubecontainer.StartContainer, "bar2", nil, ""}, - {kubecontainer.StartContainer, "bar3", nil, ""}, - {kubecontainer.StartContainer, "bar4", nil, ""}, - {kubecontainer.StartContainer, "bar5", images.ErrImageNeverPull, - "Container image \"foo/pull_never_image:v1\" is not present with pull policy of Never"}, - } - - result := runSyncPod(t, dm, fakeDocker, pod, nil, true) - verifySyncResults(t, expectedResults, result) - - assert.NoError(t, fakeDocker.AssertImagesPulled([]string{"foo/infra_image:v1", "foo/pull_always_image:v1", "foo/pull_if_not_present_image:v1"})) - - fakeDocker.Lock() - defer fakeDocker.Unlock() - - if len(fakeDocker.Created) != 5 { - t.Errorf("unexpected containers created %v", fakeDocker.Created) - } -} - -// This test only covers SyncPod with PullImageFailure, CreateContainerFailure and StartContainerFailure. -// There are still quite a few failure cases not covered. -// TODO(random-liu): Better way to test the SyncPod failures. -func TestSyncPodWithFailure(t *testing.T) { - pod := makePod("foo", nil) - tests := map[string]struct { - container v1.Container - dockerError map[string]error - expected []*kubecontainer.SyncResult - }{ - "PullImageFailure": { - v1.Container{Name: "bar", Image: "foo/real_image:v1", ImagePullPolicy: v1.PullAlways}, - map[string]error{"pull": fmt.Errorf("can't pull image")}, - []*kubecontainer.SyncResult{{kubecontainer.StartContainer, "bar", images.ErrImagePull, "can't pull image"}}, - }, - "CreateContainerFailure": { - v1.Container{Name: "bar", Image: "foo/already_present:v2"}, - map[string]error{"create": fmt.Errorf("can't create container")}, - []*kubecontainer.SyncResult{{kubecontainer.StartContainer, "bar", kubecontainer.ErrRunContainer, "can't create container"}}, - }, - "StartContainerFailure": { - v1.Container{Name: "bar", Image: "foo/already_present:v2"}, - map[string]error{"start": fmt.Errorf("can't start container")}, - []*kubecontainer.SyncResult{{kubecontainer.StartContainer, "bar", kubecontainer.ErrRunContainer, "can't start container"}}, - }, - } - - for _, test := range tests { - dm, fakeDocker := newTestDockerManagerWithRealImageManager() - fakeDocker.InjectImages([]dockertypes.Image{{ID: test.container.Image}}) - // Pretend that the pod infra container has already been created, so that - // we can run the user containers. - fakeDocker.SetFakeRunningContainers([]*FakeContainer{{ - ID: "9876", - Name: "/k8s_POD." + strconv.FormatUint(generatePodInfraContainerHash(pod), 16) + "_foo_new_12345678_0", - }}) - fakeDocker.InjectErrors(test.dockerError) - pod.Spec.Containers = []v1.Container{test.container} - result := runSyncPod(t, dm, fakeDocker, pod, nil, true) - verifySyncResults(t, test.expected, result) - } -} - -// Verify whether all the expected results appear exactly only once in real result. -func verifySyncResults(t *testing.T, expectedResults []*kubecontainer.SyncResult, realResult kubecontainer.PodSyncResult) { - if len(expectedResults) != len(realResult.SyncResults) { - t.Errorf("expected sync result number %d, got %d", len(expectedResults), len(realResult.SyncResults)) - for _, r := range expectedResults { - t.Errorf("expected result: %#v", r) - } - for _, r := range realResult.SyncResults { - t.Errorf("real result: %+v", r) - } - return - } - // The container start order is not fixed, because SyncPod() uses a map to store the containers to start. - // Here we should make sure each expected result appears only once in the real result. - for _, expectR := range expectedResults { - found := 0 - for _, realR := range realResult.SyncResults { - // For the same action of the same container, the result should be the same - if realR.Target == expectR.Target && realR.Action == expectR.Action { - // We use Contains() here because the message format may be changed, but at least we should - // make sure that the expected message is contained. - if realR.Error != expectR.Error || !strings.Contains(realR.Message, expectR.Message) { - t.Errorf("expected sync result %#v, got %+v", expectR, realR) - } - found++ - } - } - if found == 0 { - t.Errorf("not found expected result %#v", expectR) - } - if found > 1 { - t.Errorf("got %d duplicate expected result %#v", found, expectR) - } - } -} -func TestGetDockerOptSeparator(t *testing.T) { - dm110, _ := newTestDockerManagerWithVersion("1.10.1", "1.22") - dm111, _ := newTestDockerManagerWithVersion("1.11.0", "1.23") - - sep, err := dm110.getDockerOptSeparator() - require.NoError(t, err, "error getting docker opt separator for 1.10.1") - assert.Equal(t, SecurityOptSeparatorOld, sep, "security opt separator for docker 1.10") - - sep, err = dm111.getDockerOptSeparator() - require.NoError(t, err, "error getting docker opt separator for 1.11.1") - assert.Equal(t, SecurityOptSeparatorNew, sep, "security opt separator for docker 1.11") -} - -func TestFmtDockerOpts(t *testing.T) { - secOpts := []dockerOpt{{"seccomp", "unconfined", ""}} - - opts := FmtDockerOpts(secOpts, ':') - assert.Len(t, opts, 1) - assert.Contains(t, opts, "seccomp:unconfined", "Docker 1.10") - - opts = FmtDockerOpts(secOpts, '=') - assert.Len(t, opts, 1) - assert.Contains(t, opts, "seccomp=unconfined", "Docker 1.11") -} - -func TestCheckVersionCompatibility(t *testing.T) { - type test struct { - version string - compatible bool - } - tests := []test{ - // Minimum apiversion - {minimumDockerAPIVersion, true}, - // Invalid apiversion - {"invalid_api_version", false}, - // Older apiversion - {"1.0.0", false}, - // Newer apiversion - // NOTE(random-liu): We need to bump up the newer apiversion, - // if docker apiversion really reaches "9.9.9" someday. But I - // really doubt whether the test could live that long. - {"9.9.9", true}, - } - for i, tt := range tests { - testCase := fmt.Sprintf("test case #%d test version %q", i, tt.version) - dm, fakeDocker := newTestDockerManagerWithVersion("", tt.version) - err := dm.checkVersionCompatibility() - assert.Equal(t, tt.compatible, err == nil, testCase) - if tt.compatible == true { - // Get docker version error - fakeDocker.InjectError("version", fmt.Errorf("injected version error")) - err := dm.checkVersionCompatibility() - assert.NotNil(t, err, testCase+" version error check") - } - } -} - -func expectEvent(recorder *record.FakeRecorder, eventType, reason, msg string) error { - expected := fmt.Sprintf("%s %s %s", eventType, reason, msg) - var events []string - // Drain the event channel. - for { - select { - case event := <-recorder.Events: - if event == expected { - return nil - } - events = append(events, event) - default: - // No more events! - return fmt.Errorf("Event %q not found in [%s]", expected, strings.Join(events, ", ")) - } - } -} - -func TestNewDockerVersion(t *testing.T) { - cases := []struct { - value string - out string - err bool - }{ - {value: "1", err: true}, - {value: "1.8", err: true}, - {value: "1.8.1", out: "1.8.1"}, - {value: "1.8.1-fc21.other", out: "1.8.1-fc21.other"}, - {value: "1.8.1-beta.12", out: "1.8.1-beta.12"}, - } - for _, test := range cases { - v, err := newDockerVersion(test.value) - switch { - case err != nil && test.err: - continue - case (err != nil) != test.err: - t.Errorf("error for %q: expected %t, got %v", test.value, test.err, err) - continue - } - if v.String() != test.out { - t.Errorf("unexpected parsed version %q for %q", v, test.value) - } - } -} - -func TestDockerVersionComparison(t *testing.T) { - v, err := newDockerVersion("1.10.3") - assert.NoError(t, err) - for i, test := range []struct { - version string - compare int - err bool - }{ - {version: "1.9.2", compare: 1}, - {version: "1.9.2-rc2", compare: 1}, - {version: "1.10.3", compare: 0}, - {version: "1.10.3-rc3", compare: 1}, - {version: "1.10.4", compare: -1}, - {version: "1.10.4-rc1", compare: -1}, - {version: "1.11.1", compare: -1}, - {version: "1.11.1-rc4", compare: -1}, - {version: "invalid", err: true}, - } { - testCase := fmt.Sprintf("test case #%d test version %q", i, test.version) - res, err := v.Compare(test.version) - assert.Equal(t, test.err, err != nil, testCase) - if !test.err { - assert.Equal(t, test.compare, res, testCase) - } - } -} - -func TestVersion(t *testing.T) { - expectedVersion := "1.8.1" - expectedAPIVersion := "1.20" - dm, _ := newTestDockerManagerWithVersion(expectedVersion, expectedAPIVersion) - version, err := dm.Version() - if err != nil { - t.Errorf("got error while getting docker server version - %v", err) - } - if e, a := expectedVersion, version.String(); e != a { - t.Errorf("expect docker server version %q, got %q", e, a) - } - - apiVersion, err := dm.APIVersion() - if err != nil { - t.Errorf("got error while getting docker api version - %v", err) - } - if e, a := expectedAPIVersion, apiVersion.String(); e != a { - t.Errorf("expect docker api version %q, got %q", e, a) - } -} - -func TestGetPodStatusNoSuchContainer(t *testing.T) { - const ( - noSuchContainerID = "nosuchcontainer" - infraContainerID = "9876" - ) - dm, fakeDocker := newTestDockerManager() - pod := makePod("foo", &v1.PodSpec{ - Containers: []v1.Container{{Name: "nosuchcontainer"}}, - }) - - fakeDocker.SetFakeContainers([]*FakeContainer{ - { - ID: noSuchContainerID, - Name: "/k8s_nosuchcontainer_foo_new_12345678_42", - ExitCode: 0, - StartedAt: time.Now(), - FinishedAt: time.Now(), - Running: false, - }, - { - ID: infraContainerID, - Name: "/k8s_POD." + strconv.FormatUint(generatePodInfraContainerHash(pod), 16) + "_foo_new_12345678_42", - ExitCode: 0, - StartedAt: time.Now(), - FinishedAt: time.Now(), - Running: false, - }, - }) - fakeDocker.InjectErrors(map[string]error{"inspect_container": fmt.Errorf("Error: No such container: %s", noSuchContainerID)}) - runSyncPod(t, dm, fakeDocker, pod, nil, false) - - // Verify that we will try to start new contrainers even if the inspections - // failed. - verifyCalls(t, fakeDocker, []string{ - // Inspect dead infra container for possible network teardown - "inspect_container", - // Start a new infra container. - "create", "start", "inspect_container", "inspect_container", - // Start a new container. - "create", "start", "inspect_container", - }) -} - -func TestSyncPodDeadInfraContainerTeardown(t *testing.T) { - const ( - noSuchContainerID = "nosuchcontainer" - infraContainerID = "9876" - ) - dm, fakeDocker := newTestDockerManager() - dm.podInfraContainerImage = "pod_infra_image" - ctrl := gomock.NewController(t) - defer ctrl.Finish() - fnp := nettest.NewMockNetworkPlugin(ctrl) - dm.network = network.NewPluginManager(fnp) - - pod := makePod("foo", &v1.PodSpec{ - Containers: []v1.Container{{Name: noSuchContainerID}}, - }) - - fakeDocker.SetFakeContainers([]*FakeContainer{ - { - ID: infraContainerID, - Name: "/k8s_POD." + strconv.FormatUint(generatePodInfraContainerHash(pod), 16) + "_foo_new_12345678_42", - ExitCode: 0, - StartedAt: time.Now(), - FinishedAt: time.Now(), - Running: false, - }, - }) - - // Can be called multiple times due to GetPodStatus - fnp.EXPECT().Name().Return("someNetworkPlugin").AnyTimes() - fnp.EXPECT().TearDownPod("new", "foo", gomock.Any()).Return(nil) - fnp.EXPECT().GetPodNetworkStatus("new", "foo", gomock.Any()).Return(&network.PodNetworkStatus{IP: net.ParseIP("1.1.1.1")}, nil).AnyTimes() - fnp.EXPECT().SetUpPod("new", "foo", gomock.Any()).Return(nil) - - runSyncPod(t, dm, fakeDocker, pod, nil, false) - - // Verify that we will try to start new contrainers even if the inspections - // failed. - verifyCalls(t, fakeDocker, []string{ - // Inspect dead infra container for possible network teardown - "inspect_container", - // Start a new infra container. - "create", "start", "inspect_container", "inspect_container", - // Start a new container. - "create", "start", "inspect_container", - }) -} - -func TestPruneInitContainers(t *testing.T) { - dm, fake := newTestDockerManager() - pod := makePod("", &v1.PodSpec{ - InitContainers: []v1.Container{ - {Name: "init1"}, - {Name: "init2"}, - }, - }) - status := &kubecontainer.PodStatus{ - ContainerStatuses: []*kubecontainer.ContainerStatus{ - {Name: "init2", ID: kubecontainer.ContainerID{ID: "init2-new-1"}, State: kubecontainer.ContainerStateExited}, - {Name: "init1", ID: kubecontainer.ContainerID{ID: "init1-new-1"}, State: kubecontainer.ContainerStateExited}, - {Name: "init1", ID: kubecontainer.ContainerID{ID: "init1-new-2"}, State: kubecontainer.ContainerStateExited}, - {Name: "init1", ID: kubecontainer.ContainerID{ID: "init1-old-1"}, State: kubecontainer.ContainerStateExited}, - {Name: "init2", ID: kubecontainer.ContainerID{ID: "init2-old-1"}, State: kubecontainer.ContainerStateExited}, - }, - } - fake.ExitedContainerList = []dockertypes.Container{ - {ID: "init1-new-1"}, - {ID: "init1-new-2"}, - {ID: "init1-old-1"}, - {ID: "init2-new-1"}, - {ID: "init2-old-1"}, - } - keep := map[kubecontainer.DockerID]int{} - dm.pruneInitContainersBeforeStart(pod, status, keep) - sort.Sort(sort.StringSlice(fake.Removed)) - if !reflect.DeepEqual([]string{"init1-new-2", "init1-old-1", "init2-old-1"}, fake.Removed) { - t.Fatal(fake.Removed) - } -} - -func TestSyncPodGetsPodIPFromNetworkPlugin(t *testing.T) { - const ( - containerID = "123" - infraContainerID = "9876" - fakePodIP = "10.10.10.10" - ) - dm, fakeDocker := newTestDockerManager() - dm.podInfraContainerImage = "pod_infra_image" - ctrl := gomock.NewController(t) - defer ctrl.Finish() - fnp := nettest.NewMockNetworkPlugin(ctrl) - dm.network = network.NewPluginManager(fnp) - - pod := makePod("foo", &v1.PodSpec{ - Containers: []v1.Container{ - {Name: "bar"}, - }, - }) - - // Can be called multiple times due to GetPodStatus - fnp.EXPECT().Name().Return("someNetworkPlugin").AnyTimes() - fnp.EXPECT().GetPodNetworkStatus("new", "foo", gomock.Any()).Return(&network.PodNetworkStatus{IP: net.ParseIP(fakePodIP)}, nil).AnyTimes() - fnp.EXPECT().SetUpPod("new", "foo", gomock.Any()).Return(nil) - - runSyncPod(t, dm, fakeDocker, pod, nil, false) - verifyCalls(t, fakeDocker, []string{ - // Create pod infra container. - "create", "start", "inspect_container", "inspect_container", - // Create container. - "create", "start", "inspect_container", - }) -} - -// only test conditions "if inspect == nil || inspect.Config == nil || inspect.Config.Labels == nil" now -func TestContainerAndPodFromLabels(t *testing.T) { - tests := []struct { - inspect *dockertypes.ContainerJSON - expectedError error - }{ - { - inspect: nil, - expectedError: errNoPodOnContainer, - }, - { - inspect: &dockertypes.ContainerJSON{}, - expectedError: errNoPodOnContainer, - }, - { - inspect: &dockertypes.ContainerJSON{ - Config: &dockercontainer.Config{ - Hostname: "foo", - }, - }, - expectedError: errNoPodOnContainer, - }, - } - - for k, v := range tests { - pod, container, err := containerAndPodFromLabels(v.inspect) - if pod != nil || container != nil || err != v.expectedError { - t.Errorf("case[%q]: expected: nil, nil, %v, got: %v, %v, %v", k, v.expectedError, pod, container, err) - } - } -} - -func makePod(name string, spec *v1.PodSpec) *v1.Pod { - if spec == nil { - spec = &v1.PodSpec{Containers: []v1.Container{{Name: "foo"}, {Name: "bar"}}} - } - pod := &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - UID: "12345678", - Name: name, - Namespace: "new", - }, - Spec: *spec, - } - return pod -} diff --git a/pkg/kubelet/dockertools/docker_manager_unsupported.go b/pkg/kubelet/dockertools/docker_manager_unsupported.go index 9ef3683ab0e..fe3100be9ff 100644 --- a/pkg/kubelet/dockertools/docker_manager_unsupported.go +++ b/pkg/kubelet/dockertools/docker_manager_unsupported.go @@ -18,39 +18,6 @@ limitations under the License. package dockertools -import ( - dockertypes "github.com/docker/engine-api/types" - dockercontainer "github.com/docker/engine-api/types/container" - - "k8s.io/kubernetes/pkg/api/v1" - kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" -) - -// These two functions are OS specific (for now at least) -func updateHostConfig(hc *dockercontainer.HostConfig, opts *kubecontainer.RunContainerOptions) { -} - func DefaultMemorySwap() int64 { return -1 } - -func getContainerIP(container *dockertypes.ContainerJSON) string { - return "" -} - -func getNetworkingMode() string { - return "" -} - -func containerProvidesPodIP(containerName string) bool { - return false -} - -func containerIsNetworked(containerName string) bool { - return containerName == PodInfraContainerName -} - -// Returns nil as both Seccomp and AppArmor security options are not valid on Windows -func (dm *DockerManager) getSecurityOpts(pod *v1.Pod, ctrName string) ([]dockerOpt, error) { - return nil, nil -} diff --git a/pkg/kubelet/dockertools/docker_manager_windows.go b/pkg/kubelet/dockertools/docker_manager_windows.go index 8b29d964e19..1246734b266 100644 --- a/pkg/kubelet/dockertools/docker_manager_windows.go +++ b/pkg/kubelet/dockertools/docker_manager_windows.go @@ -18,63 +18,6 @@ limitations under the License. package dockertools -import ( - "os" - - dockertypes "github.com/docker/engine-api/types" - dockercontainer "github.com/docker/engine-api/types/container" - - "k8s.io/kubernetes/pkg/api/v1" - kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" -) - -// These two functions are OS specific (for now at least) -func updateHostConfig(hc *dockercontainer.HostConfig, opts *kubecontainer.RunContainerOptions) { - // There is no /etc/resolv.conf in Windows, DNS and DNSSearch options would have to be passed to Docker runtime instead - hc.DNS = opts.DNS - hc.DNSSearch = opts.DNSSearch - - // MemorySwap == -1 is not currently supported in Docker 1.14 on Windows - // https://github.com/docker/docker/blob/master/daemon/daemon_windows.go#L175 - hc.Resources.MemorySwap = 0 -} - func DefaultMemorySwap() int64 { return 0 } - -func getContainerIP(container *dockertypes.ContainerJSON) string { - if container.NetworkSettings != nil { - for _, network := range container.NetworkSettings.Networks { - if network.IPAddress != "" { - return network.IPAddress - } - } - } - return "" -} - -func getNetworkingMode() string { - // Allow override via env variable. Otherwise, use a default "kubenet" network - netMode := os.Getenv("CONTAINER_NETWORK") - if netMode == "" { - netMode = "kubenet" - } - return netMode -} - -// Infrastructure containers are not supported on Windows. For this reason, we -// make sure to not grab the infra container's IP for the pod. -func containerProvidesPodIP(containerName string) bool { - return containerName != PodInfraContainerName -} - -// All containers in Windows need networking setup/teardown -func containerIsNetworked(containerName string) bool { - return true -} - -// Returns nil as both Seccomp and AppArmor security options are not valid on Windows -func (dm *DockerManager) getSecurityOpts(pod *v1.Pod, ctrName string) ([]dockerOpt, error) { - return nil, nil -} diff --git a/pkg/kubelet/dockertools/docker_test.go b/pkg/kubelet/dockertools/docker_test.go index 87f873fdd2f..adf636b2f72 100644 --- a/pkg/kubelet/dockertools/docker_test.go +++ b/pkg/kubelet/dockertools/docker_test.go @@ -23,26 +23,16 @@ import ( "math/rand" "path" "reflect" - "sort" - "strconv" "strings" "testing" "github.com/docker/docker/pkg/jsonmessage" dockertypes "github.com/docker/engine-api/types" - dockernat "github.com/docker/go-connections/nat" - cadvisorapi "github.com/google/cadvisor/info/v1" "github.com/stretchr/testify/assert" "k8s.io/apimachinery/pkg/types" - "k8s.io/client-go/tools/record" "k8s.io/kubernetes/pkg/api/v1" - "k8s.io/kubernetes/pkg/apis/componentconfig" "k8s.io/kubernetes/pkg/credentialprovider" - kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" - containertest "k8s.io/kubernetes/pkg/kubelet/container/testing" "k8s.io/kubernetes/pkg/kubelet/images" - "k8s.io/kubernetes/pkg/kubelet/network" - nettest "k8s.io/kubernetes/pkg/kubelet/network/testing" hashutil "k8s.io/kubernetes/pkg/util/hash" ) @@ -716,272 +706,6 @@ func TestGetImageRef(t *testing.T) { } } -type podsByID []*kubecontainer.Pod - -func (b podsByID) Len() int { return len(b) } -func (b podsByID) Swap(i, j int) { b[i], b[j] = b[j], b[i] } -func (b podsByID) Less(i, j int) bool { return b[i].ID < b[j].ID } - -type containersByID []*kubecontainer.Container - -func (b containersByID) Len() int { return len(b) } -func (b containersByID) Swap(i, j int) { b[i], b[j] = b[j], b[i] } -func (b containersByID) Less(i, j int) bool { return b[i].ID.ID < b[j].ID.ID } - -func TestFindContainersByPod(t *testing.T) { - tests := []struct { - runningContainerList []dockertypes.Container - exitedContainerList []dockertypes.Container - all bool - expectedPods []*kubecontainer.Pod - }{ - - { - []dockertypes.Container{ - { - ID: "foobar", - Names: []string{"/k8s_foobar.1234_qux_ns_1234_42"}, - }, - { - ID: "barbar", - Names: []string{"/k8s_barbar.1234_qux_ns_2343_42"}, - }, - { - ID: "baz", - Names: []string{"/k8s_baz.1234_qux_ns_1234_42"}, - }, - }, - []dockertypes.Container{ - { - ID: "barfoo", - Names: []string{"/k8s_barfoo.1234_qux_ns_1234_42"}, - }, - { - ID: "bazbaz", - Names: []string{"/k8s_bazbaz.1234_qux_ns_5678_42"}, - }, - }, - false, - []*kubecontainer.Pod{ - { - ID: "1234", - Name: "qux", - Namespace: "ns", - Containers: []*kubecontainer.Container{ - { - ID: kubecontainer.DockerID("foobar").ContainerID(), - Name: "foobar", - Hash: 0x1234, - State: kubecontainer.ContainerStateUnknown, - }, - { - ID: kubecontainer.DockerID("baz").ContainerID(), - Name: "baz", - Hash: 0x1234, - State: kubecontainer.ContainerStateUnknown, - }, - }, - }, - { - ID: "2343", - Name: "qux", - Namespace: "ns", - Containers: []*kubecontainer.Container{ - { - ID: kubecontainer.DockerID("barbar").ContainerID(), - Name: "barbar", - Hash: 0x1234, - State: kubecontainer.ContainerStateUnknown, - }, - }, - }, - }, - }, - { - []dockertypes.Container{ - { - ID: "foobar", - Names: []string{"/k8s_foobar.1234_qux_ns_1234_42"}, - }, - { - ID: "barbar", - Names: []string{"/k8s_barbar.1234_qux_ns_2343_42"}, - }, - { - ID: "baz", - Names: []string{"/k8s_baz.1234_qux_ns_1234_42"}, - }, - }, - []dockertypes.Container{ - { - ID: "barfoo", - Names: []string{"/k8s_barfoo.1234_qux_ns_1234_42"}, - }, - { - ID: "bazbaz", - Names: []string{"/k8s_bazbaz.1234_qux_ns_5678_42"}, - }, - }, - true, - []*kubecontainer.Pod{ - { - ID: "1234", - Name: "qux", - Namespace: "ns", - Containers: []*kubecontainer.Container{ - { - ID: kubecontainer.DockerID("foobar").ContainerID(), - Name: "foobar", - Hash: 0x1234, - State: kubecontainer.ContainerStateUnknown, - }, - { - ID: kubecontainer.DockerID("barfoo").ContainerID(), - Name: "barfoo", - Hash: 0x1234, - State: kubecontainer.ContainerStateUnknown, - }, - { - ID: kubecontainer.DockerID("baz").ContainerID(), - Name: "baz", - Hash: 0x1234, - State: kubecontainer.ContainerStateUnknown, - }, - }, - }, - { - ID: "2343", - Name: "qux", - Namespace: "ns", - Containers: []*kubecontainer.Container{ - { - ID: kubecontainer.DockerID("barbar").ContainerID(), - Name: "barbar", - Hash: 0x1234, - State: kubecontainer.ContainerStateUnknown, - }, - }, - }, - { - ID: "5678", - Name: "qux", - Namespace: "ns", - Containers: []*kubecontainer.Container{ - { - ID: kubecontainer.DockerID("bazbaz").ContainerID(), - Name: "bazbaz", - Hash: 0x1234, - State: kubecontainer.ContainerStateUnknown, - }, - }, - }, - }, - }, - { - []dockertypes.Container{}, - []dockertypes.Container{}, - true, - nil, - }, - } - fakeClient := NewFakeDockerClient() - np, _ := network.InitNetworkPlugin([]network.NetworkPlugin{}, "", nettest.NewFakeHost(nil), componentconfig.HairpinNone, "10.0.0.0/8", network.UseDefaultMTU) - // image back-off is set to nil, this test should not pull images - containerManager := NewFakeDockerManager(fakeClient, &record.FakeRecorder{}, nil, nil, &cadvisorapi.MachineInfo{}, "", 0, 0, "", &containertest.FakeOS{}, np, nil, nil, nil) - for i, test := range tests { - fakeClient.RunningContainerList = test.runningContainerList - fakeClient.ExitedContainerList = test.exitedContainerList - - result, _ := containerManager.GetPods(test.all) - for i := range result { - sort.Sort(containersByID(result[i].Containers)) - } - for i := range test.expectedPods { - sort.Sort(containersByID(test.expectedPods[i].Containers)) - } - sort.Sort(podsByID(result)) - sort.Sort(podsByID(test.expectedPods)) - if !reflect.DeepEqual(test.expectedPods, result) { - t.Errorf("%d: expected: %#v, saw: %#v", i, test.expectedPods, result) - } - } -} - -func TestMakePortsAndBindings(t *testing.T) { - portMapping := func(container, host int, protocol v1.Protocol, ip string) kubecontainer.PortMapping { - return kubecontainer.PortMapping{ - ContainerPort: container, - HostPort: host, - Protocol: protocol, - HostIP: ip, - } - } - - portBinding := func(port, ip string) dockernat.PortBinding { - return dockernat.PortBinding{ - HostPort: port, - HostIP: ip, - } - } - - ports := []kubecontainer.PortMapping{ - portMapping(80, 8080, "", "127.0.0.1"), - portMapping(443, 443, "tcp", ""), - portMapping(444, 444, "udp", ""), - portMapping(445, 445, "foobar", ""), - portMapping(443, 446, "tcp", ""), - portMapping(443, 446, "udp", ""), - } - - exposedPorts, bindings := makePortsAndBindings(ports) - - // Count the expected exposed ports and bindings - expectedExposedPorts := map[string]struct{}{} - - for _, binding := range ports { - dockerKey := strconv.Itoa(binding.ContainerPort) + "/" + string(binding.Protocol) - expectedExposedPorts[dockerKey] = struct{}{} - } - - // Should expose right ports in docker - if len(expectedExposedPorts) != len(exposedPorts) { - t.Errorf("Unexpected ports and bindings, %#v %#v %#v", ports, exposedPorts, bindings) - } - - // Construct expected bindings - expectPortBindings := map[string][]dockernat.PortBinding{ - "80/tcp": { - portBinding("8080", "127.0.0.1"), - }, - "443/tcp": { - portBinding("443", ""), - portBinding("446", ""), - }, - "443/udp": { - portBinding("446", ""), - }, - "444/udp": { - portBinding("444", ""), - }, - "445/tcp": { - portBinding("445", ""), - }, - } - - // interate the bindings by dockerPort, and check its portBindings - for dockerPort, portBindings := range bindings { - switch dockerPort { - case "80/tcp", "443/tcp", "443/udp", "444/udp", "445/tcp": - if !reflect.DeepEqual(expectPortBindings[string(dockerPort)], portBindings) { - t.Errorf("Unexpected portbindings for %#v, expected: %#v, but got: %#v", - dockerPort, expectPortBindings[string(dockerPort)], portBindings) - } - default: - t.Errorf("Unexpected docker port: %#v with portbindings: %#v", dockerPort, portBindings) - } - } -} - const letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" func randStringBytes(n int) string { diff --git a/pkg/kubelet/dockertools/fake_manager.go b/pkg/kubelet/dockertools/fake_manager.go deleted file mode 100644 index 83f4d35fbe6..00000000000 --- a/pkg/kubelet/dockertools/fake_manager.go +++ /dev/null @@ -1,78 +0,0 @@ -/* -Copyright 2015 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package dockertools - -import ( - cadvisorapi "github.com/google/cadvisor/info/v1" - "k8s.io/apimachinery/pkg/types" - "k8s.io/client-go/tools/record" - "k8s.io/client-go/util/flowcontrol" - "k8s.io/kubernetes/pkg/api/v1" - kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" - "k8s.io/kubernetes/pkg/kubelet/network" - proberesults "k8s.io/kubernetes/pkg/kubelet/prober/results" - kubetypes "k8s.io/kubernetes/pkg/kubelet/types" - "k8s.io/kubernetes/pkg/kubelet/util/cache" - "k8s.io/kubernetes/pkg/util/oom" - "k8s.io/kubernetes/pkg/util/procfs" -) - -func NewFakeDockerManager( - client DockerInterface, - recorder record.EventRecorder, - livenessManager proberesults.Manager, - containerRefManager *kubecontainer.RefManager, - machineInfo *cadvisorapi.MachineInfo, - podInfraContainerImage string, - qps float32, - burst int, - containerLogsDir string, - osInterface kubecontainer.OSInterface, - networkPlugin network.NetworkPlugin, - runtimeHelper kubecontainer.RuntimeHelper, - httpClient kubetypes.HttpGetter, imageBackOff *flowcontrol.Backoff) *DockerManager { - - fakeOOMAdjuster := oom.NewFakeOOMAdjuster() - fakeProcFs := procfs.NewFakeProcFS() - fakePodGetter := &fakePodGetter{} - dm := NewDockerManager(client, recorder, livenessManager, containerRefManager, fakePodGetter, machineInfo, podInfraContainerImage, qps, - burst, containerLogsDir, osInterface, networkPlugin, runtimeHelper, httpClient, &NativeExecHandler{}, - fakeOOMAdjuster, fakeProcFs, false, imageBackOff, false, false, true, "/var/lib/kubelet/seccomp") - dm.dockerPuller = &FakeDockerPuller{client: client} - - // ttl of version cache is set to 0 so we always call version api directly in tests. - dm.versionCache = cache.NewObjectCache( - func() (interface{}, error) { - return dm.getVersionInfo() - }, - 0, - ) - return dm -} - -type fakePodGetter struct { - pods map[types.UID]*v1.Pod -} - -func newFakePodGetter() *fakePodGetter { - return &fakePodGetter{make(map[types.UID]*v1.Pod)} -} - -func (f *fakePodGetter) GetPodByUID(uid types.UID) (*v1.Pod, bool) { - pod, found := f.pods[uid] - return pod, found -} diff --git a/pkg/kubelet/dockertools/images.go b/pkg/kubelet/dockertools/images.go deleted file mode 100644 index e87c30fcf81..00000000000 --- a/pkg/kubelet/dockertools/images.go +++ /dev/null @@ -1,102 +0,0 @@ -/* -Copyright 2016 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package dockertools - -import ( - "fmt" - "sync" - - "github.com/golang/glog" - - dockertypes "github.com/docker/engine-api/types" - runtime "k8s.io/kubernetes/pkg/kubelet/container" -) - -// imageStatsProvider exposes stats about all images currently available. -type imageStatsProvider struct { - sync.Mutex - // layers caches the current layers, key is the layer ID. - layers map[string]*dockertypes.ImageHistory - // imageToLayerIDs maps image to its layer IDs. - imageToLayerIDs map[string][]string - // Docker remote API client - c DockerInterface -} - -func newImageStatsProvider(c DockerInterface) *imageStatsProvider { - return &imageStatsProvider{ - layers: make(map[string]*dockertypes.ImageHistory), - imageToLayerIDs: make(map[string][]string), - c: c, - } -} - -func (isp *imageStatsProvider) ImageStats() (*runtime.ImageStats, error) { - images, err := isp.c.ListImages(dockertypes.ImageListOptions{}) - if err != nil { - return nil, fmt.Errorf("failed to list docker images - %v", err) - } - // Take the lock to protect the cache - isp.Lock() - defer isp.Unlock() - // Create new cache each time, this is a little more memory consuming, but: - // * ImageStats is only called every 10 seconds - // * We use pointers and reference to copy cache elements. - // The memory usage should be acceptable. - // TODO(random-liu): Add more logic to implement in place cache update. - newLayers := make(map[string]*dockertypes.ImageHistory) - newImageToLayerIDs := make(map[string][]string) - for _, image := range images { - layerIDs, ok := isp.imageToLayerIDs[image.ID] - if !ok { - // Get information about the various layers of the given docker image. - history, err := isp.c.ImageHistory(image.ID) - if err != nil { - // Skip the image and inspect again in next ImageStats if the image is still there - glog.V(2).Infof("failed to get history of docker image %+v - %v", image, err) - continue - } - // Cache each layer - for i := range history { - layer := &history[i] - key := layer.ID - // Some of the layers are empty. - // We are hoping that these layers are unique to each image. - // Still keying with the CreatedBy field to be safe. - if key == "" || key == "" { - key = key + layer.CreatedBy - } - layerIDs = append(layerIDs, key) - newLayers[key] = layer - } - } else { - for _, layerID := range layerIDs { - newLayers[layerID] = isp.layers[layerID] - } - } - newImageToLayerIDs[image.ID] = layerIDs - } - ret := &runtime.ImageStats{} - // Calculate the total storage bytes - for _, layer := range newLayers { - ret.TotalStorageBytes += uint64(layer.Size) - } - // Update current cache - isp.layers = newLayers - isp.imageToLayerIDs = newImageToLayerIDs - return ret, nil -} diff --git a/pkg/kubelet/dockertools/images_test.go b/pkg/kubelet/dockertools/images_test.go deleted file mode 100644 index 08afd6357de..00000000000 --- a/pkg/kubelet/dockertools/images_test.go +++ /dev/null @@ -1,334 +0,0 @@ -/* -Copyright 2016 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package dockertools - -import ( - "testing" - - dockertypes "github.com/docker/engine-api/types" - "github.com/stretchr/testify/assert" -) - -func TestImageStatsNoImages(t *testing.T) { - fakeDockerClient := NewFakeDockerClient().WithVersion("1.2.3", "1.2") - isp := newImageStatsProvider(fakeDockerClient) - st, err := isp.ImageStats() - as := assert.New(t) - as.NoError(err) - as.NoError(fakeDockerClient.AssertCalls([]string{"list_images"})) - as.Equal(st.TotalStorageBytes, uint64(0)) -} - -func TestImageStatsWithImages(t *testing.T) { - fakeDockerClient := NewFakeDockerClient().WithVersion("1.2.3", "1.2") - fakeHistoryData := map[string][]dockertypes.ImageHistory{ - "busybox": { - { - ID: "0123456", - CreatedBy: "foo", - Size: 100, - }, - { - ID: "0123457", - CreatedBy: "duplicate", - Size: 200, - }, - { - ID: "", - CreatedBy: "baz", - Size: 300, - }, - }, - "kubelet": { - { - ID: "1123456", - CreatedBy: "foo", - Size: 200, - }, - { - ID: "", - CreatedBy: "1baz", - Size: 400, - }, - }, - "busybox-new": { - { - ID: "01234567", - CreatedBy: "foo", - Size: 100, - }, - { - ID: "0123457", - CreatedBy: "duplicate", - Size: 200, - }, - { - ID: "", - CreatedBy: "baz", - Size: 300, - }, - }, - } - fakeDockerClient.InjectImageHistory(fakeHistoryData) - fakeDockerClient.InjectImages([]dockertypes.Image{ - { - ID: "busybox", - }, - { - ID: "kubelet", - }, - { - ID: "busybox-new", - }, - }) - isp := newImageStatsProvider(fakeDockerClient) - st, err := isp.ImageStats() - as := assert.New(t) - as.NoError(err) - as.NoError(fakeDockerClient.AssertCalls([]string{"list_images", "image_history", "image_history", "image_history"})) - const expectedOutput uint64 = 1300 - as.Equal(expectedOutput, st.TotalStorageBytes, "expected %d, got %d", expectedOutput, st.TotalStorageBytes) -} - -func TestImageStatsWithCachedImages(t *testing.T) { - for _, test := range []struct { - oldLayers map[string]*dockertypes.ImageHistory - oldImageToLayerIDs map[string][]string - images []dockertypes.Image - history map[string][]dockertypes.ImageHistory - expectedCalls []string - expectedLayers map[string]*dockertypes.ImageHistory - expectedImageToLayerIDs map[string][]string - expectedTotalStorageSize uint64 - }{ - { - // No cache - oldLayers: make(map[string]*dockertypes.ImageHistory), - oldImageToLayerIDs: make(map[string][]string), - images: []dockertypes.Image{ - { - ID: "busybox", - }, - { - ID: "kubelet", - }, - }, - history: map[string][]dockertypes.ImageHistory{ - "busybox": { - { - ID: "0123456", - CreatedBy: "foo", - Size: 100, - }, - { - ID: "", - CreatedBy: "baz", - Size: 300, - }, - }, - "kubelet": { - { - ID: "1123456", - CreatedBy: "foo", - Size: 200, - }, - { - ID: "", - CreatedBy: "1baz", - Size: 400, - }, - }, - }, - expectedCalls: []string{"list_images", "image_history", "image_history"}, - expectedLayers: map[string]*dockertypes.ImageHistory{ - "0123456": { - ID: "0123456", - CreatedBy: "foo", - Size: 100, - }, - "1123456": { - ID: "1123456", - CreatedBy: "foo", - Size: 200, - }, - "baz": { - ID: "", - CreatedBy: "baz", - Size: 300, - }, - "1baz": { - ID: "", - CreatedBy: "1baz", - Size: 400, - }, - }, - expectedImageToLayerIDs: map[string][]string{ - "busybox": {"0123456", "baz"}, - "kubelet": {"1123456", "1baz"}, - }, - expectedTotalStorageSize: 1000, - }, - { - // Use cache value - oldLayers: map[string]*dockertypes.ImageHistory{ - "0123456": { - ID: "0123456", - CreatedBy: "foo", - Size: 100, - }, - "baz": { - ID: "", - CreatedBy: "baz", - Size: 300, - }, - }, - oldImageToLayerIDs: map[string][]string{ - "busybox": {"0123456", "baz"}, - }, - images: []dockertypes.Image{ - { - ID: "busybox", - }, - { - ID: "kubelet", - }, - }, - history: map[string][]dockertypes.ImageHistory{ - "busybox": { - { - ID: "0123456", - CreatedBy: "foo", - Size: 100, - }, - { - ID: "", - CreatedBy: "baz", - Size: 300, - }, - }, - "kubelet": { - { - ID: "1123456", - CreatedBy: "foo", - Size: 200, - }, - { - ID: "", - CreatedBy: "1baz", - Size: 400, - }, - }, - }, - expectedCalls: []string{"list_images", "image_history"}, - expectedLayers: map[string]*dockertypes.ImageHistory{ - "0123456": { - ID: "0123456", - CreatedBy: "foo", - Size: 100, - }, - "1123456": { - ID: "1123456", - CreatedBy: "foo", - Size: 200, - }, - "baz": { - ID: "", - CreatedBy: "baz", - Size: 300, - }, - "1baz": { - ID: "", - CreatedBy: "1baz", - Size: 400, - }, - }, - expectedImageToLayerIDs: map[string][]string{ - "busybox": {"0123456", "baz"}, - "kubelet": {"1123456", "1baz"}, - }, - expectedTotalStorageSize: 1000, - }, - { - // Unused cache value - oldLayers: map[string]*dockertypes.ImageHistory{ - "0123456": { - ID: "0123456", - CreatedBy: "foo", - Size: 100, - }, - "baz": { - ID: "", - CreatedBy: "baz", - Size: 300, - }, - }, - oldImageToLayerIDs: map[string][]string{ - "busybox": {"0123456", "baz"}, - }, - images: []dockertypes.Image{ - { - ID: "kubelet", - }, - }, - history: map[string][]dockertypes.ImageHistory{ - "kubelet": { - { - ID: "1123456", - CreatedBy: "foo", - Size: 200, - }, - { - ID: "", - CreatedBy: "1baz", - Size: 400, - }, - }, - }, - expectedCalls: []string{"list_images", "image_history"}, - expectedLayers: map[string]*dockertypes.ImageHistory{ - "1123456": { - ID: "1123456", - CreatedBy: "foo", - Size: 200, - }, - "1baz": { - ID: "", - CreatedBy: "1baz", - Size: 400, - }, - }, - expectedImageToLayerIDs: map[string][]string{ - "kubelet": {"1123456", "1baz"}, - }, - expectedTotalStorageSize: 600, - }, - } { - fakeDockerClient := NewFakeDockerClient().WithVersion("1.2.3", "1.2") - fakeDockerClient.InjectImages(test.images) - fakeDockerClient.InjectImageHistory(test.history) - isp := newImageStatsProvider(fakeDockerClient) - isp.layers = test.oldLayers - isp.imageToLayerIDs = test.oldImageToLayerIDs - st, err := isp.ImageStats() - as := assert.New(t) - as.NoError(err) - as.NoError(fakeDockerClient.AssertCalls(test.expectedCalls)) - as.Equal(test.expectedLayers, isp.layers, "expected %+v, got %+v", test.expectedLayers, isp.layers) - as.Equal(test.expectedImageToLayerIDs, isp.imageToLayerIDs, "expected %+v, got %+v", test.expectedImageToLayerIDs, isp.imageToLayerIDs) - as.Equal(test.expectedTotalStorageSize, st.TotalStorageBytes, "expected %d, got %d", test.expectedTotalStorageSize, st.TotalStorageBytes) - } -} diff --git a/pkg/kubelet/dockertools/labels.go b/pkg/kubelet/dockertools/labels.go deleted file mode 100644 index 148b0c6635c..00000000000 --- a/pkg/kubelet/dockertools/labels.go +++ /dev/null @@ -1,249 +0,0 @@ -/* -Copyright 2015 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package dockertools - -import ( - "encoding/json" - "strconv" - - "github.com/golang/glog" - "k8s.io/apimachinery/pkg/runtime" - kubetypes "k8s.io/apimachinery/pkg/types" - "k8s.io/kubernetes/pkg/api" - "k8s.io/kubernetes/pkg/api/v1" - kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" - "k8s.io/kubernetes/pkg/kubelet/custommetrics" - "k8s.io/kubernetes/pkg/kubelet/types" - "k8s.io/kubernetes/pkg/kubelet/util/format" -) - -// This file contains all docker label related constants and functions, including: -// * label setters and getters -// * label filters (maybe in the future) - -const ( - kubernetesPodDeletionGracePeriodLabel = "io.kubernetes.pod.deletionGracePeriod" - kubernetesPodTerminationGracePeriodLabel = "io.kubernetes.pod.terminationGracePeriod" - - kubernetesContainerHashLabel = "io.kubernetes.container.hash" - kubernetesContainerRestartCountLabel = "io.kubernetes.container.restartCount" - kubernetesContainerTerminationMessagePathLabel = "io.kubernetes.container.terminationMessagePath" - kubernetesContainerTerminationMessagePolicyLabel = "io.kubernetes.container.terminationMessagePolicy" - kubernetesContainerPreStopHandlerLabel = "io.kubernetes.container.preStopHandler" - kubernetesContainerPortsLabel = "io.kubernetes.container.ports" // Added in 1.4 - - // TODO(random-liu): Keep this for old containers, remove this when we drop support for v1.1. - kubernetesPodLabel = "io.kubernetes.pod.data" - - cadvisorPrometheusMetricsLabel = "io.cadvisor.metric.prometheus" -) - -// Container information which has been labelled on each docker container -// TODO(random-liu): The type of Hash should be compliance with kubelet container status. -type labelledContainerInfo struct { - PodName string - PodNamespace string - PodUID kubetypes.UID - PodDeletionGracePeriod *int64 - PodTerminationGracePeriod *int64 - Name string - Hash string - RestartCount int - TerminationMessagePath string - TerminationMessagePolicy v1.TerminationMessagePolicy - PreStopHandler *v1.Handler - Ports []v1.ContainerPort -} - -func newLabels(container *v1.Container, pod *v1.Pod, restartCount int, enableCustomMetrics bool) map[string]string { - labels := map[string]string{} - labels[types.KubernetesPodNameLabel] = pod.Name - labels[types.KubernetesPodNamespaceLabel] = pod.Namespace - labels[types.KubernetesPodUIDLabel] = string(pod.UID) - if pod.DeletionGracePeriodSeconds != nil { - labels[kubernetesPodDeletionGracePeriodLabel] = strconv.FormatInt(*pod.DeletionGracePeriodSeconds, 10) - } - if pod.Spec.TerminationGracePeriodSeconds != nil { - labels[kubernetesPodTerminationGracePeriodLabel] = strconv.FormatInt(*pod.Spec.TerminationGracePeriodSeconds, 10) - } - - labels[types.KubernetesContainerNameLabel] = container.Name - labels[kubernetesContainerHashLabel] = strconv.FormatUint(kubecontainer.HashContainerLegacy(container), 16) - labels[kubernetesContainerRestartCountLabel] = strconv.Itoa(restartCount) - labels[kubernetesContainerTerminationMessagePathLabel] = container.TerminationMessagePath - labels[kubernetesContainerTerminationMessagePolicyLabel] = string(container.TerminationMessagePolicy) - if container.Lifecycle != nil && container.Lifecycle.PreStop != nil { - // Using json enconding so that the PreStop handler object is readable after writing as a label - rawPreStop, err := json.Marshal(container.Lifecycle.PreStop) - if err != nil { - glog.Errorf("Unable to marshal lifecycle PreStop handler for container %q of pod %q: %v", container.Name, format.Pod(pod), err) - } else { - labels[kubernetesContainerPreStopHandlerLabel] = string(rawPreStop) - } - } - if len(container.Ports) > 0 { - rawContainerPorts, err := json.Marshal(container.Ports) - if err != nil { - glog.Errorf("Unable to marshal container ports for container %q for pod %q: %v", container.Name, format.Pod(pod), err) - } else { - labels[kubernetesContainerPortsLabel] = string(rawContainerPorts) - } - } - if enableCustomMetrics { - path, err := custommetrics.GetCAdvisorCustomMetricsDefinitionPath(container) - if path != nil && err == nil { - labels[cadvisorPrometheusMetricsLabel] = *path - } - } - - return labels -} - -func getContainerInfoFromLabel(labels map[string]string) *labelledContainerInfo { - var err error - containerInfo := &labelledContainerInfo{ - PodName: getStringValueFromLabel(labels, types.KubernetesPodNameLabel), - PodNamespace: getStringValueFromLabel(labels, types.KubernetesPodNamespaceLabel), - PodUID: kubetypes.UID(getStringValueFromLabel(labels, types.KubernetesPodUIDLabel)), - Name: getStringValueFromLabel(labels, types.KubernetesContainerNameLabel), - Hash: getStringValueFromLabel(labels, kubernetesContainerHashLabel), - TerminationMessagePath: getStringValueFromLabel(labels, kubernetesContainerTerminationMessagePathLabel), - TerminationMessagePolicy: v1.TerminationMessagePolicy(getStringValueFromLabel(labels, kubernetesContainerTerminationMessagePolicyLabel)), - } - if containerInfo.RestartCount, err = getIntValueFromLabel(labels, kubernetesContainerRestartCountLabel); err != nil { - logError(containerInfo, kubernetesContainerRestartCountLabel, err) - } - if containerInfo.PodDeletionGracePeriod, err = getInt64PointerFromLabel(labels, kubernetesPodDeletionGracePeriodLabel); err != nil { - logError(containerInfo, kubernetesPodDeletionGracePeriodLabel, err) - } - if containerInfo.PodTerminationGracePeriod, err = getInt64PointerFromLabel(labels, kubernetesPodTerminationGracePeriodLabel); err != nil { - logError(containerInfo, kubernetesPodTerminationGracePeriodLabel, err) - } - preStopHandler := &v1.Handler{} - if found, err := getJsonObjectFromLabel(labels, kubernetesContainerPreStopHandlerLabel, preStopHandler); err != nil { - logError(containerInfo, kubernetesContainerPreStopHandlerLabel, err) - } else if found { - containerInfo.PreStopHandler = preStopHandler - } - containerPorts := []v1.ContainerPort{} - if found, err := getJsonObjectFromLabel(labels, kubernetesContainerPortsLabel, &containerPorts); err != nil { - logError(containerInfo, kubernetesContainerPortsLabel, err) - } else if found { - containerInfo.Ports = containerPorts - } - supplyContainerInfoWithOldLabel(labels, containerInfo) - return containerInfo -} - -func getStringValueFromLabel(labels map[string]string, label string) string { - if value, found := labels[label]; found { - return value - } - // Do not report error, because there should be many old containers without label now. - glog.V(3).Infof("Container doesn't have label %s, it may be an old or invalid container", label) - // Return empty string "" for these containers, the caller will get value by other ways. - return "" -} - -func getIntValueFromLabel(labels map[string]string, label string) (int, error) { - if strValue, found := labels[label]; found { - intValue, err := strconv.Atoi(strValue) - if err != nil { - // This really should not happen. Just set value to 0 to handle this abnormal case - return 0, err - } - return intValue, nil - } - // Do not report error, because there should be many old containers without label now. - glog.V(3).Infof("Container doesn't have label %s, it may be an old or invalid container", label) - // Just set the value to 0 - return 0, nil -} - -func getInt64PointerFromLabel(labels map[string]string, label string) (*int64, error) { - if strValue, found := labels[label]; found { - int64Value, err := strconv.ParseInt(strValue, 10, 64) - if err != nil { - return nil, err - } - return &int64Value, nil - } - // Because it's normal that a container has no PodDeletionGracePeriod and PodTerminationGracePeriod label, - // don't report any error here. - return nil, nil -} - -// getJsonObjectFromLabel returns a bool value indicating whether an object is found -func getJsonObjectFromLabel(labels map[string]string, label string, value interface{}) (bool, error) { - if strValue, found := labels[label]; found { - err := json.Unmarshal([]byte(strValue), value) - return found, err - } - // Because it's normal that a container has no PreStopHandler label, don't report any error here. - return false, nil -} - -// The label kubernetesPodLabel is added a long time ago (#7421), it serialized the whole v1.Pod to a docker label. -// We want to remove this label because it serialized too much useless information. However kubelet may still work -// with old containers which only have this label for a long time until we completely deprecate the old label. -// Before that to ensure correctness we have to supply information with the old labels when newly added labels -// are not available. -// TODO(random-liu): Remove this function when we can completely remove label kubernetesPodLabel, probably after -// dropping support for v1.1. -func supplyContainerInfoWithOldLabel(labels map[string]string, containerInfo *labelledContainerInfo) { - // Get v1.Pod from old label - var pod *v1.Pod - data, found := labels[kubernetesPodLabel] - if !found { - // Don't report any error here, because it's normal that a container has no pod label, especially - // when we gradually deprecate the old label - return - } - pod = &v1.Pod{} - if err := runtime.DecodeInto(api.Codecs.UniversalDecoder(), []byte(data), pod); err != nil { - // If the pod label can't be parsed, we should report an error - logError(containerInfo, kubernetesPodLabel, err) - return - } - if containerInfo.PodDeletionGracePeriod == nil { - containerInfo.PodDeletionGracePeriod = pod.DeletionGracePeriodSeconds - } - if containerInfo.PodTerminationGracePeriod == nil { - containerInfo.PodTerminationGracePeriod = pod.Spec.TerminationGracePeriodSeconds - } - - // Get v1.Container from v1.Pod - var container *v1.Container - for i := range pod.Spec.Containers { - if pod.Spec.Containers[i].Name == containerInfo.Name { - container = &pod.Spec.Containers[i] - break - } - } - if container == nil { - glog.Errorf("Unable to find container %q in pod %q", containerInfo.Name, format.Pod(pod)) - return - } - if containerInfo.PreStopHandler == nil && container.Lifecycle != nil { - containerInfo.PreStopHandler = container.Lifecycle.PreStop - } -} - -func logError(containerInfo *labelledContainerInfo, label string, err error) { - glog.Errorf("Unable to get %q for container %q of pod %q: %v", label, containerInfo.Name, - kubecontainer.BuildPodFullName(containerInfo.PodName, containerInfo.PodNamespace), err) -} diff --git a/pkg/kubelet/dockertools/labels_test.go b/pkg/kubelet/dockertools/labels_test.go deleted file mode 100644 index 19bbfd18be2..00000000000 --- a/pkg/kubelet/dockertools/labels_test.go +++ /dev/null @@ -1,142 +0,0 @@ -/* -Copyright 2015 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package dockertools - -import ( - "reflect" - "strconv" - "testing" - - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/util/intstr" - "k8s.io/kubernetes/pkg/api/testapi" - "k8s.io/kubernetes/pkg/api/v1" - kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" - "k8s.io/kubernetes/pkg/kubelet/util/format" -) - -func TestLabels(t *testing.T) { - restartCount := 5 - deletionGracePeriod := int64(10) - terminationGracePeriod := int64(10) - lifecycle := &v1.Lifecycle{ - // Left PostStart as nil - PreStop: &v1.Handler{ - Exec: &v1.ExecAction{ - Command: []string{"action1", "action2"}, - }, - HTTPGet: &v1.HTTPGetAction{ - Path: "path", - Host: "host", - Port: intstr.FromInt(8080), - Scheme: "scheme", - }, - TCPSocket: &v1.TCPSocketAction{ - Port: intstr.FromString("80"), - }, - }, - } - containerPorts := []v1.ContainerPort{ - { - Name: "http", - HostPort: 80, - ContainerPort: 8080, - Protocol: v1.ProtocolTCP, - }, - { - Name: "https", - HostPort: 443, - ContainerPort: 6443, - Protocol: v1.ProtocolTCP, - }, - } - container := &v1.Container{ - Name: "test_container", - Ports: containerPorts, - TerminationMessagePath: "/somepath", - Lifecycle: lifecycle, - } - pod := &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test_pod", - Namespace: "test_pod_namespace", - UID: "test_pod_uid", - DeletionGracePeriodSeconds: &deletionGracePeriod, - }, - Spec: v1.PodSpec{ - Containers: []v1.Container{*container}, - TerminationGracePeriodSeconds: &terminationGracePeriod, - }, - } - expected := &labelledContainerInfo{ - PodName: pod.Name, - PodNamespace: pod.Namespace, - PodUID: pod.UID, - PodDeletionGracePeriod: pod.DeletionGracePeriodSeconds, - PodTerminationGracePeriod: pod.Spec.TerminationGracePeriodSeconds, - Name: container.Name, - Hash: strconv.FormatUint(kubecontainer.HashContainerLegacy(container), 16), - RestartCount: restartCount, - TerminationMessagePath: container.TerminationMessagePath, - PreStopHandler: container.Lifecycle.PreStop, - Ports: containerPorts, - } - - // Test whether we can get right information from label - labels := newLabels(container, pod, restartCount, false) - containerInfo := getContainerInfoFromLabel(labels) - if !reflect.DeepEqual(containerInfo, expected) { - t.Errorf("expected %v, got %v", expected, containerInfo) - } - - // Test when DeletionGracePeriodSeconds, TerminationGracePeriodSeconds and Lifecycle are nil, - // the information got from label should also be nil - container.Lifecycle = nil - pod.DeletionGracePeriodSeconds = nil - pod.Spec.TerminationGracePeriodSeconds = nil - expected.PodDeletionGracePeriod = nil - expected.PodTerminationGracePeriod = nil - expected.PreStopHandler = nil - // Because container is changed, the Hash should be updated - expected.Hash = strconv.FormatUint(kubecontainer.HashContainerLegacy(container), 16) - labels = newLabels(container, pod, restartCount, false) - containerInfo = getContainerInfoFromLabel(labels) - if !reflect.DeepEqual(containerInfo, expected) { - t.Errorf("expected %v, got %v", expected, containerInfo) - } - - // Test when DeletionGracePeriodSeconds, TerminationGracePeriodSeconds and Lifecycle are nil, - // but the old label kubernetesPodLabel is set, the information got from label should also be set - pod.DeletionGracePeriodSeconds = &deletionGracePeriod - pod.Spec.TerminationGracePeriodSeconds = &terminationGracePeriod - container.Lifecycle = lifecycle - data, err := runtime.Encode(testapi.Default.Codec(), pod) - if err != nil { - t.Fatalf("Failed to encode pod %q into string: %v", format.Pod(pod), err) - } - labels[kubernetesPodLabel] = string(data) - expected.PodDeletionGracePeriod = pod.DeletionGracePeriodSeconds - expected.PodTerminationGracePeriod = pod.Spec.TerminationGracePeriodSeconds - expected.PreStopHandler = container.Lifecycle.PreStop - // Do not update expected.Hash here, because we directly use the labels in last test, so we never - // changed the kubernetesContainerHashLabel in this test, the expected.Hash shouldn't be changed. - containerInfo = getContainerInfoFromLabel(labels) - if !reflect.DeepEqual(containerInfo, expected) { - t.Errorf("expected %v, got %v", expected, containerInfo) - } -} diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index f60e5e89db7..6a519fdc707 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -101,7 +101,6 @@ import ( "k8s.io/kubernetes/pkg/util/mount" nodeutil "k8s.io/kubernetes/pkg/util/node" "k8s.io/kubernetes/pkg/util/oom" - "k8s.io/kubernetes/pkg/util/procfs" "k8s.io/kubernetes/pkg/volume" "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates" ) @@ -504,7 +503,6 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub return nil, err } - procFs := procfs.NewProcFS() imageBackOff := flowcontrol.NewBackOff(backOffPeriod, MaxContainerBackOff) klet.livenessManager = proberesults.NewManager() @@ -541,7 +539,12 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub pluginSettings.LegacyRuntimeHost = nl // rktnetes cannot be run with CRI. - if kubeCfg.ContainerRuntime != "rkt" && kubeCfg.EnableCRI { + // TODO(yujuhong): Remove the EnableCRI field. + if kubeCfg.ContainerRuntime != "rkt" { + kubeCfg.EnableCRI = true + } + + if kubeCfg.EnableCRI { // kubelet defers to the runtime shim to setup networking. Setting // this to nil will prevent it from trying to invoke the plugin. // It's easier to always probe and initialize plugins till cri @@ -621,76 +624,36 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub klet.containerRuntime = runtime klet.runner = runtime } else { - switch kubeCfg.ContainerRuntime { - case "docker": - runtime := dockertools.NewDockerManager( - kubeDeps.DockerClient, - kubecontainer.FilterEventRecorder(kubeDeps.Recorder), - klet.livenessManager, - containerRefManager, - klet.podManager, - machineInfo, - kubeCfg.PodInfraContainerImage, - float32(kubeCfg.RegistryPullQPS), - int(kubeCfg.RegistryBurst), - ContainerLogsDir, - kubeDeps.OSInterface, - klet.networkPlugin, - klet, - klet.httpClient, - dockerExecHandler, - kubeDeps.OOMAdjuster, - procFs, - klet.cpuCFSQuota, - imageBackOff, - kubeCfg.SerializeImagePulls, - kubeCfg.EnableCustomMetrics, - // If using "kubenet", the Kubernetes network plugin that wraps - // CNI's bridge plugin, it knows how to set the hairpin veth flag - // so we tell the container runtime to back away from setting it. - // If the kubelet is started with any other plugin we can't be - // sure it handles the hairpin case so we instruct the docker - // runtime to set the flag instead. - klet.hairpinMode == componentconfig.HairpinVeth && kubeCfg.NetworkPluginName != "kubenet", - kubeCfg.SeccompProfileRoot, - kubeDeps.ContainerRuntimeOptions..., - ) - klet.containerRuntime = runtime - klet.runner = kubecontainer.DirectStreamingRunner(runtime) - case "rkt": - // TODO: Include hairpin mode settings in rkt? - conf := &rkt.Config{ - Path: kubeCfg.RktPath, - Stage1Image: kubeCfg.RktStage1Image, - InsecureOptions: "image,ondisk", - } - runtime, err := rkt.New( - kubeCfg.RktAPIEndpoint, - conf, - klet, - kubeDeps.Recorder, - containerRefManager, - klet.podManager, - klet.livenessManager, - klet.httpClient, - klet.networkPlugin, - klet.hairpinMode == componentconfig.HairpinVeth, - utilexec.New(), - kubecontainer.RealOS{}, - imageBackOff, - kubeCfg.SerializeImagePulls, - float32(kubeCfg.RegistryPullQPS), - int(kubeCfg.RegistryBurst), - kubeCfg.RuntimeRequestTimeout.Duration, - ) - if err != nil { - return nil, err - } - klet.containerRuntime = runtime - klet.runner = kubecontainer.DirectStreamingRunner(runtime) - default: - return nil, fmt.Errorf("unsupported container runtime %q specified", kubeCfg.ContainerRuntime) + // TODO: Include hairpin mode settings in rkt? + conf := &rkt.Config{ + Path: kubeCfg.RktPath, + Stage1Image: kubeCfg.RktStage1Image, + InsecureOptions: "image,ondisk", } + runtime, err := rkt.New( + kubeCfg.RktAPIEndpoint, + conf, + klet, + kubeDeps.Recorder, + containerRefManager, + klet.podManager, + klet.livenessManager, + klet.httpClient, + klet.networkPlugin, + klet.hairpinMode == componentconfig.HairpinVeth, + utilexec.New(), + kubecontainer.RealOS{}, + imageBackOff, + kubeCfg.SerializeImagePulls, + float32(kubeCfg.RegistryPullQPS), + int(kubeCfg.RegistryBurst), + kubeCfg.RuntimeRequestTimeout.Duration, + ) + if err != nil { + return nil, err + } + klet.containerRuntime = runtime + klet.runner = kubecontainer.DirectStreamingRunner(runtime) } // TODO: Factor out "StatsProvider" from Kubelet so we don't have a cyclic dependency