From 4c17c09a8f08cf82707020379e86611145bcfe31 Mon Sep 17 00:00:00 2001 From: Victor Marmol Date: Sat, 14 Mar 2015 08:36:41 -0700 Subject: [PATCH 1/2] Separate GC policy into containerGC struct. The policy today takes a min GC age, max dead containers per pod, and max containers overall. When GC is called, only dead containers created more than min GC age ago are considered. The policy tries to keep one dead instance of every pod's containers. --- pkg/kubelet/container_gc.go | 234 ++++++++++++++++++++++++ pkg/kubelet/container_gc_test.go | 302 +++++++++++++++++++++++++++++++ 2 files changed, 536 insertions(+) create mode 100644 pkg/kubelet/container_gc.go create mode 100644 pkg/kubelet/container_gc_test.go diff --git a/pkg/kubelet/container_gc.go b/pkg/kubelet/container_gc.go new file mode 100644 index 00000000000..ef86a552cb5 --- /dev/null +++ b/pkg/kubelet/container_gc.go @@ -0,0 +1,234 @@ +/* +Copyright 2014 Google Inc. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package kubelet + +import ( + "fmt" + "sort" + "time" + + "github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/dockertools" + "github.com/GoogleCloudPlatform/kubernetes/pkg/types" + "github.com/fsouza/go-dockerclient" + "github.com/golang/glog" +) + +// Specified a policy for garbage collecting containers. +type ContainerGCPolicy struct { + // Minimum age at which a container can be garbage collected, zero for no limit. + MinAge time.Duration + + // Max number of dead containers any single pod (UID, container name) pair is + // allowed to have, less than zero for no limit. + MaxPerPodContainer int + + // Max number of total dead containers, less than zero for no limit. + MaxContainers int +} + +// Manages garbage collection of dead containers. +// +// Implementation is thread-compatible. +type containerGC interface { + // Garbage collect containers. + GarbageCollect() error +} + +// TODO(vmarmol): Preferentially remove pod infra containers. +type realContainerGC struct { + // Docker client to use. + dockerClient dockertools.DockerInterface + + // Policy for garbage collection. + policy ContainerGCPolicy +} + +// New containerGC instance with the specified policy. +func newContainerGC(dockerClient dockertools.DockerInterface, policy ContainerGCPolicy) (containerGC, error) { + if policy.MinAge < 0 { + return nil, fmt.Errorf("invalid minimum garbage collection age: %v", policy.MinAge) + } + + return &realContainerGC{ + dockerClient: dockerClient, + policy: policy, + }, nil +} + +// Internal information kept for containers being considered for GC. +type containerGCInfo struct { + // Docker ID of the container. + id string + + // Docker name of the container. + name string + + // Creation time for the container. + createTime time.Time +} + +// Containers are considered for eviction as units of (UID, container name) pair. +type evictUnit struct { + // UID of the pod. + uid types.UID + + // Name of the container in the pod. + name string +} +type containersByEvictUnit map[evictUnit][]containerGCInfo + +// Returns the number of containers in this map. +func (self containersByEvictUnit) NumContainers() int { + num := 0 + for key := range self { + num += len(self[key]) + } + + return num +} + +// Returns the number of pod in this map. +func (self containersByEvictUnit) NumEvictUnits() int { + return len(self) +} + +// Newest first. +type byCreated []containerGCInfo + +func (a byCreated) Len() int { return len(a) } +func (a byCreated) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a byCreated) Less(i, j int) bool { return a[i].createTime.After(a[j].createTime) } + +func (self *realContainerGC) GarbageCollect() error { + // Separate containers by evict units. + evictUnits, unidentifiedContainers, err := self.evictableContainers() + if err != nil { + return err + } + + // Remove unidentified containers. + for _, container := range unidentifiedContainers { + glog.Infof("Removing unidentified dead container %q with ID %q", container.name, container.id) + err = self.dockerClient.RemoveContainer(docker.RemoveContainerOptions{ID: container.id}) + if err != nil { + glog.Warningf("Failed to remove unidentified dead container %q: %v", container.name, err) + } + } + + // Enforce max containers per evict unit. + if self.policy.MaxPerPodContainer >= 0 { + self.enforceMaxContainersPerEvictUnit(evictUnits, self.policy.MaxPerPodContainer) + } + + // Enforce max total number of containers. + if self.policy.MaxContainers >= 0 && evictUnits.NumContainers() > self.policy.MaxContainers { + // Leave an equal number of containers per evict unit (min: 1). + numContainersPerEvictUnit := self.policy.MaxContainers / evictUnits.NumEvictUnits() + if numContainersPerEvictUnit < 1 { + numContainersPerEvictUnit = 1 + } + self.enforceMaxContainersPerEvictUnit(evictUnits, numContainersPerEvictUnit) + + // If we still need to evict, evict oldest first. + numContainers := evictUnits.NumContainers() + if numContainers > self.policy.MaxContainers { + flattened := make([]containerGCInfo, 0, numContainers) + for uid := range evictUnits { + flattened = append(flattened, evictUnits[uid]...) + } + sort.Sort(byCreated(flattened)) + + self.removeOldestN(flattened, numContainers-self.policy.MaxContainers) + } + } + + return nil +} + +func (self *realContainerGC) enforceMaxContainersPerEvictUnit(evictUnits containersByEvictUnit, MaxContainers int) { + for uid := range evictUnits { + toRemove := len(evictUnits[uid]) - MaxContainers + + if toRemove > 0 { + evictUnits[uid] = self.removeOldestN(evictUnits[uid], toRemove) + } + } +} + +// Removes the oldest toRemove containers and returns the resulting slice. +func (self *realContainerGC) removeOldestN(containers []containerGCInfo, toRemove int) []containerGCInfo { + // Remove from oldest to newest (last to first). + numToKeep := len(containers) - toRemove + for i := numToKeep; i < len(containers); i++ { + err := self.dockerClient.RemoveContainer(docker.RemoveContainerOptions{ID: containers[i].id}) + if err != nil { + glog.Warningf("Failed to remove dead container %q: %v", containers[i].name, err) + } + } + + // Assume we removed the containers so that we're not too aggressive. + return containers[:numToKeep] +} + +// Get all containers that are evictable. Evictable containers are: not running +// and created more than MinAge ago. +func (self *realContainerGC) evictableContainers() (containersByEvictUnit, []containerGCInfo, error) { + containers, err := dockertools.GetKubeletDockerContainers(self.dockerClient, true) + if err != nil { + return containersByEvictUnit{}, []containerGCInfo{}, err + } + + unidentifiedContainers := make([]containerGCInfo, 0) + evictUnits := make(containersByEvictUnit) + newestGCTime := time.Now().Add(-self.policy.MinAge) + for _, container := range containers { + // Prune out running containers. + data, err := self.dockerClient.InspectContainer(container.ID) + if err != nil { + // Container may have been removed already, skip. + continue + } else if data.State.Running { + continue + } else if newestGCTime.Before(data.Created) { + continue + } + + containerInfo := containerGCInfo{ + id: container.ID, + name: container.Names[0], + createTime: data.Created, + } + + _, uid, name, _, err := dockertools.ParseDockerName(container.Names[0]) + if err != nil { + unidentifiedContainers = append(unidentifiedContainers, containerInfo) + } else { + key := evictUnit{ + uid: uid, + name: name, + } + evictUnits[key] = append(evictUnits[key], containerInfo) + } + } + + // Sort the containers by age. + for uid := range evictUnits { + sort.Sort(byCreated(evictUnits[uid])) + } + + return evictUnits, unidentifiedContainers, nil +} diff --git a/pkg/kubelet/container_gc_test.go b/pkg/kubelet/container_gc_test.go new file mode 100644 index 00000000000..514fe3c98ef --- /dev/null +++ b/pkg/kubelet/container_gc_test.go @@ -0,0 +1,302 @@ +/* +Copyright 2014 Google Inc. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package kubelet + +import ( + "fmt" + "testing" + "time" + + "github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/dockertools" + "github.com/fsouza/go-dockerclient" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func newTestContainerGC(t *testing.T, MinAge time.Duration, MaxPerPodContainer, MaxContainers int) (containerGC, *dockertools.FakeDockerClient) { + fakeDocker := new(dockertools.FakeDockerClient) + gc, err := newContainerGC(fakeDocker, ContainerGCPolicy{ + MinAge: MinAge, + MaxPerPodContainer: MaxPerPodContainer, + MaxContainers: MaxContainers, + }) + require.Nil(t, err) + return gc, fakeDocker +} + +// Makes a stable time object, lower id is earlier time. +func makeTime(id int) time.Time { + return zero.Add(time.Duration(id) * time.Second) +} + +// Makes an API object with the specified Docker ID and pod UID. +func makeAPIContainer(uid, name, dockerID string) docker.APIContainers { + return docker.APIContainers{ + Names: []string{fmt.Sprintf("/k8s_%s_bar_new_%s_42", name, uid)}, + ID: dockerID, + } +} + +// Makes a function that adds to a map a detailed container with the specified properties. +func makeContainerDetail(id string, running bool, created time.Time) func(map[string]*docker.Container) { + return func(m map[string]*docker.Container) { + m[id] = &docker.Container{ + State: docker.State{ + Running: running, + }, + ID: id, + Created: created, + } + } +} + +// Makes a detailed container map from the specified functions. +func makeContainerDetailMap(funcs ...func(map[string]*docker.Container)) map[string]*docker.Container { + m := make(map[string]*docker.Container, len(funcs)) + for _, f := range funcs { + f(m) + } + return m +} + +func TestGarbageCollectZeroMaxContainers(t *testing.T) { + gc, fakeDocker := newTestContainerGC(t, time.Minute, 1, 0) + fakeDocker.ContainerList = []docker.APIContainers{ + makeAPIContainer("foo", "POD", "1876"), + } + fakeDocker.ContainerMap = makeContainerDetailMap( + makeContainerDetail("1876", false, makeTime(0)), + ) + + assert.Nil(t, gc.GarbageCollect()) + assert.Len(t, fakeDocker.Removed, 1) +} + +func TestGarbageCollectNoMaxPerPodContainerLimit(t *testing.T) { + gc, fakeDocker := newTestContainerGC(t, time.Minute, -1, 4) + fakeDocker.ContainerList = []docker.APIContainers{ + makeAPIContainer("foo", "POD", "1876"), + makeAPIContainer("foo1", "POD", "2876"), + makeAPIContainer("foo2", "POD", "3876"), + makeAPIContainer("foo3", "POD", "4876"), + makeAPIContainer("foo4", "POD", "5876"), + } + fakeDocker.ContainerMap = makeContainerDetailMap( + makeContainerDetail("1876", false, makeTime(0)), + makeContainerDetail("2876", false, makeTime(1)), + makeContainerDetail("3876", false, makeTime(2)), + makeContainerDetail("4876", false, makeTime(3)), + makeContainerDetail("5876", false, makeTime(4)), + ) + + assert.Nil(t, gc.GarbageCollect()) + assert.Len(t, fakeDocker.Removed, 1) +} + +func TestGarbageCollectNoMaxLimit(t *testing.T) { + gc, fakeDocker := newTestContainerGC(t, time.Minute, 1, -1) + fakeDocker.ContainerList = []docker.APIContainers{ + makeAPIContainer("foo", "POD", "1876"), + makeAPIContainer("foo1", "POD", "2876"), + makeAPIContainer("foo2", "POD", "3876"), + makeAPIContainer("foo3", "POD", "4876"), + makeAPIContainer("foo4", "POD", "5876"), + } + fakeDocker.ContainerMap = makeContainerDetailMap( + makeContainerDetail("1876", false, makeTime(0)), + makeContainerDetail("2876", false, makeTime(0)), + makeContainerDetail("3876", false, makeTime(0)), + makeContainerDetail("4876", false, makeTime(0)), + makeContainerDetail("5876", false, makeTime(0)), + ) + + assert.Nil(t, gc.GarbageCollect()) + assert.Len(t, fakeDocker.Removed, 0) +} + +func TestGarbageCollect(t *testing.T) { + tests := []struct { + containers []docker.APIContainers + containerDetails map[string]*docker.Container + expectedRemoved []string + }{ + // Don't remove containers started recently. + { + containers: []docker.APIContainers{ + makeAPIContainer("foo", "POD", "1876"), + makeAPIContainer("foo", "POD", "2876"), + makeAPIContainer("foo", "POD", "3876"), + }, + containerDetails: makeContainerDetailMap( + makeContainerDetail("1876", false, time.Now()), + makeContainerDetail("2876", false, time.Now()), + makeContainerDetail("3876", false, time.Now()), + ), + }, + // Remove oldest containers. + { + containers: []docker.APIContainers{ + makeAPIContainer("foo", "POD", "1876"), + makeAPIContainer("foo", "POD", "2876"), + makeAPIContainer("foo", "POD", "3876"), + }, + containerDetails: makeContainerDetailMap( + makeContainerDetail("1876", false, makeTime(0)), + makeContainerDetail("2876", false, makeTime(1)), + makeContainerDetail("3876", false, makeTime(2)), + ), + expectedRemoved: []string{"1876"}, + }, + // Only remove non-running containers. + { + containers: []docker.APIContainers{ + makeAPIContainer("foo", "POD", "1876"), + makeAPIContainer("foo", "POD", "2876"), + makeAPIContainer("foo", "POD", "3876"), + makeAPIContainer("foo", "POD", "4876"), + }, + containerDetails: makeContainerDetailMap( + makeContainerDetail("1876", true, makeTime(0)), + makeContainerDetail("2876", false, makeTime(1)), + makeContainerDetail("3876", false, makeTime(2)), + makeContainerDetail("4876", false, makeTime(3)), + ), + expectedRemoved: []string{"2876"}, + }, + // Less than maxContainerCount doesn't delete any. + { + containers: []docker.APIContainers{ + makeAPIContainer("foo", "POD", "1876"), + }, + containerDetails: makeContainerDetailMap( + makeContainerDetail("1876", false, makeTime(0)), + ), + }, + // maxContainerCount applies per (UID,container) pair. + { + containers: []docker.APIContainers{ + makeAPIContainer("foo", "POD", "1876"), + makeAPIContainer("foo", "POD", "2876"), + makeAPIContainer("foo", "POD", "3876"), + makeAPIContainer("foo", "bar", "1076"), + makeAPIContainer("foo", "bar", "2076"), + makeAPIContainer("foo", "bar", "3076"), + makeAPIContainer("foo2", "POD", "1176"), + makeAPIContainer("foo2", "POD", "2176"), + makeAPIContainer("foo2", "POD", "3176"), + }, + containerDetails: makeContainerDetailMap( + makeContainerDetail("1876", false, makeTime(0)), + makeContainerDetail("2876", false, makeTime(1)), + makeContainerDetail("3876", false, makeTime(2)), + makeContainerDetail("1076", false, makeTime(0)), + makeContainerDetail("2076", false, makeTime(1)), + makeContainerDetail("3076", false, makeTime(2)), + makeContainerDetail("1176", false, makeTime(0)), + makeContainerDetail("2176", false, makeTime(1)), + makeContainerDetail("3176", false, makeTime(2)), + ), + expectedRemoved: []string{"1076", "1176", "1876"}, + }, + // Remove non-running unidentified Kubernetes containers. + { + containers: []docker.APIContainers{ + { + // Unidentified Kubernetes container. + Names: []string{"/k8s_unidentified"}, + ID: "1876", + }, + { + // Unidentified (non-running) Kubernetes container. + Names: []string{"/k8s_unidentified"}, + ID: "2876", + }, + makeAPIContainer("foo", "POD", "3876"), + }, + containerDetails: makeContainerDetailMap( + makeContainerDetail("1876", true, makeTime(0)), + makeContainerDetail("2876", false, makeTime(0)), + makeContainerDetail("3876", false, makeTime(0)), + ), + expectedRemoved: []string{"2876"}, + }, + // Max limit applied and tries to keep from every pod. + { + containers: []docker.APIContainers{ + makeAPIContainer("foo", "POD", "1876"), + makeAPIContainer("foo", "POD", "2876"), + makeAPIContainer("foo1", "POD", "3876"), + makeAPIContainer("foo1", "POD", "4876"), + makeAPIContainer("foo2", "POD", "5876"), + makeAPIContainer("foo2", "POD", "6876"), + makeAPIContainer("foo3", "POD", "7876"), + makeAPIContainer("foo3", "POD", "8876"), + makeAPIContainer("foo4", "POD", "9876"), + makeAPIContainer("foo4", "POD", "10876"), + }, + containerDetails: makeContainerDetailMap( + makeContainerDetail("1876", false, makeTime(0)), + makeContainerDetail("2876", false, makeTime(1)), + makeContainerDetail("3876", false, makeTime(0)), + makeContainerDetail("4876", false, makeTime(1)), + makeContainerDetail("5876", false, makeTime(0)), + makeContainerDetail("6876", false, makeTime(1)), + makeContainerDetail("7876", false, makeTime(0)), + makeContainerDetail("8876", false, makeTime(1)), + makeContainerDetail("9876", false, makeTime(0)), + makeContainerDetail("10876", false, makeTime(1)), + ), + expectedRemoved: []string{"1876", "3876", "5876", "7876", "9876"}, + }, + // If more pods than limit allows, evicts oldest pod. + { + containers: []docker.APIContainers{ + makeAPIContainer("foo", "POD", "1876"), + makeAPIContainer("foo", "POD", "2876"), + makeAPIContainer("foo1", "POD", "3876"), + makeAPIContainer("foo1", "POD", "4876"), + makeAPIContainer("foo2", "POD", "5876"), + makeAPIContainer("foo3", "POD", "6876"), + makeAPIContainer("foo4", "POD", "7876"), + makeAPIContainer("foo5", "POD", "8876"), + makeAPIContainer("foo6", "POD", "9876"), + makeAPIContainer("foo7", "POD", "10876"), + }, + containerDetails: makeContainerDetailMap( + makeContainerDetail("1876", false, makeTime(1)), + makeContainerDetail("2876", false, makeTime(2)), + makeContainerDetail("3876", false, makeTime(1)), + makeContainerDetail("4876", false, makeTime(2)), + makeContainerDetail("5876", false, makeTime(0)), + makeContainerDetail("6876", false, makeTime(1)), + makeContainerDetail("7876", false, makeTime(0)), + makeContainerDetail("8876", false, makeTime(1)), + makeContainerDetail("9876", false, makeTime(2)), + makeContainerDetail("10876", false, makeTime(1)), + ), + expectedRemoved: []string{"1876", "3876", "5876", "7876"}, + }, + } + for i, test := range tests { + t.Logf("Running test case with index %d", i) + gc, fakeDocker := newTestContainerGC(t, time.Hour, 2, 6) + fakeDocker.ContainerList = test.containers + fakeDocker.ContainerMap = test.containerDetails + assert.Nil(t, gc.GarbageCollect()) + verifyStringArrayEqualsAnyOrder(t, fakeDocker.Removed, test.expectedRemoved) + } +} From d1ed571e28ac4a5302fec88e2d5e37ac028332f9 Mon Sep 17 00:00:00 2001 From: Victor Marmol Date: Sat, 14 Mar 2015 10:13:20 -0700 Subject: [PATCH 2/2] Use containerGC in the Kubelet. New policy default is 100 containers max. Fixes #5457. --- cmd/kubelet/app/server.go | 21 ++- pkg/kubelet/kubelet.go | 122 ++---------- pkg/kubelet/kubelet_test.go | 360 +----------------------------------- 3 files changed, 30 insertions(+), 473 deletions(-) diff --git a/cmd/kubelet/app/server.go b/cmd/kubelet/app/server.go index 14fa2860a9c..09be1aff6af 100644 --- a/cmd/kubelet/app/server.go +++ b/cmd/kubelet/app/server.go @@ -65,6 +65,7 @@ type KubeletServer struct { RunOnce bool EnableDebuggingHandlers bool MinimumGCAge time.Duration + MaxPerPodContainerCount int MaxContainerCount int AuthPath string CadvisorPort uint @@ -92,7 +93,8 @@ func NewKubeletServer() *KubeletServer { RegistryBurst: 10, EnableDebuggingHandlers: true, MinimumGCAge: 1 * time.Minute, - MaxContainerCount: 5, + MaxPerPodContainerCount: 5, + MaxContainerCount: 100, CadvisorPort: 4194, OOMScoreAdj: -900, MasterServiceNamespace: api.NamespaceDefault, @@ -120,7 +122,8 @@ func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) { fs.BoolVar(&s.RunOnce, "runonce", s.RunOnce, "If true, exit after spawning pods from local manifests or remote urls. Exclusive with --api_servers, and --enable-server") fs.BoolVar(&s.EnableDebuggingHandlers, "enable_debugging_handlers", s.EnableDebuggingHandlers, "Enables server endpoints for log collection and local running of containers and commands") fs.DurationVar(&s.MinimumGCAge, "minimum_container_ttl_duration", s.MinimumGCAge, "Minimum age for a finished container before it is garbage collected. Examples: '300ms', '10s' or '2h45m'") - fs.IntVar(&s.MaxContainerCount, "maximum_dead_containers_per_container", s.MaxContainerCount, "Maximum number of old instances of a container to retain per container. Each container takes up some disk space. Default: 5.") + fs.IntVar(&s.MaxPerPodContainerCount, "maximum_dead_containers_per_container", s.MaxPerPodContainerCount, "Maximum number of old instances of a container to retain per container. Each container takes up some disk space. Default: 5.") + fs.IntVar(&s.MaxContainerCount, "maximum_dead_containers", s.MaxContainerCount, "Maximum number of old instances of a containers to retain globally. Each container takes up some disk space. Default: 100.") fs.StringVar(&s.AuthPath, "auth_path", s.AuthPath, "Path to .kubernetes_auth file, specifying how to authenticate to API server.") fs.UintVar(&s.CadvisorPort, "cadvisor_port", s.CadvisorPort, "The port of the localhost cAdvisor endpoint") fs.IntVar(&s.OOMScoreAdj, "oom_score_adj", s.OOMScoreAdj, "The oom_score_adj value for kubelet process. Values must be within the range [-1000, 1000]") @@ -170,6 +173,7 @@ func (s *KubeletServer) Run(_ []string) error { RegistryPullQPS: s.RegistryPullQPS, RegistryBurst: s.RegistryBurst, MinimumGCAge: s.MinimumGCAge, + MaxPerPodContainerCount: s.MaxPerPodContainerCount, MaxContainerCount: s.MaxContainerCount, ClusterDomain: s.ClusterDomain, ClusterDNS: s.ClusterDNS, @@ -260,7 +264,8 @@ func SimpleRunKubelet(client *client.Client, StatusUpdateFrequency: 3 * time.Second, SyncFrequency: 3 * time.Second, MinimumGCAge: 10 * time.Second, - MaxContainerCount: 5, + MaxPerPodContainerCount: 5, + MaxContainerCount: 100, MasterServiceNamespace: masterServiceNamespace, VolumePlugins: volumePlugins, TLSOptions: tlsOptions, @@ -359,6 +364,7 @@ type KubeletConfig struct { RegistryPullQPS float64 RegistryBurst int MinimumGCAge time.Duration + MaxPerPodContainerCount int MaxContainerCount int ClusterDomain string ClusterDNS util.IP @@ -386,6 +392,12 @@ func createAndInitKubelet(kc *KubeletConfig, pc *config.PodConfig) (*kubelet.Kub kubeClient = kc.KubeClient } + gcPolicy := kubelet.ContainerGCPolicy{ + MinAge: kc.MinimumGCAge, + MaxPerPodContainer: kc.MaxPerPodContainerCount, + MaxContainers: kc.MaxContainerCount, + } + k, err := kubelet.NewMainKubelet( kc.Hostname, kc.DockerClient, @@ -395,8 +407,7 @@ func createAndInitKubelet(kc *KubeletConfig, pc *config.PodConfig) (*kubelet.Kub kc.SyncFrequency, float32(kc.RegistryPullQPS), kc.RegistryBurst, - kc.MinimumGCAge, - kc.MaxContainerCount, + gcPolicy, pc.SeenAllSources, kc.ClusterDomain, net.IP(kc.ClusterDNS), diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 833826a739d..8bbcbcad2b7 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -111,8 +111,7 @@ func NewMainKubelet( resyncInterval time.Duration, pullQPS float32, pullBurst int, - minimumGCAge time.Duration, - maxContainerCount int, + containerGCPolicy ContainerGCPolicy, sourcesReady SourcesReadyFn, clusterDomain string, clusterDNS net.IP, @@ -128,9 +127,7 @@ func NewMainKubelet( if resyncInterval <= 0 { return nil, fmt.Errorf("invalid sync frequency %d", resyncInterval) } - if minimumGCAge <= 0 { - return nil, fmt.Errorf("invalid minimum GC age %d", minimumGCAge) - } + dockerClient = metrics.NewInstrumentedDockerInterface(dockerClient) // Wait for the Docker daemon to be up (with a timeout). waitStart := time.Now() @@ -164,7 +161,11 @@ func NewMainKubelet( } serviceLister := &cache.StoreToServiceLister{serviceStore} - dockerClient = metrics.NewInstrumentedDockerInterface(dockerClient) + containerGC, err := newContainerGC(dockerClient, containerGCPolicy) + if err != nil { + return nil, err + } + klet := &Kubelet{ hostname: hostname, dockerClient: dockerClient, @@ -178,8 +179,6 @@ func NewMainKubelet( httpClient: &http.Client{}, pullQPS: pullQPS, pullBurst: pullBurst, - minimumGCAge: minimumGCAge, - maxContainerCount: maxContainerCount, sourcesReady: sourcesReady, clusterDomain: clusterDomain, clusterDNS: clusterDNS, @@ -190,6 +189,7 @@ func NewMainKubelet( streamingConnectionIdleTimeout: streamingConnectionIdleTimeout, recorder: recorder, cadvisor: cadvisorInterface, + containerGC: containerGC, } dockerCache, err := dockertools.NewDockerCache(dockerClient) @@ -268,10 +268,6 @@ type Kubelet struct { // cAdvisor used for container information. cadvisor cadvisor.Interface - // Optional, minimum age required for garbage collection. If zero, no limit. - minimumGCAge time.Duration - maxContainerCount int - // If non-empty, use this for container DNS search. clusterDomain string @@ -302,6 +298,9 @@ type Kubelet struct { // A mirror pod manager which provides helper functions. mirrorManager mirrorManager + + // Policy for handling garbage collection of dead containers. + containerGC containerGC } // getRootDir returns the full path to the directory under which kubelet can @@ -443,109 +442,14 @@ func (kl *Kubelet) listPodsFromDisk() ([]types.UID, error) { return pods, nil } -type ByCreated []*docker.Container - -func (a ByCreated) Len() int { return len(a) } -func (a ByCreated) Swap(i, j int) { a[i], a[j] = a[j], a[i] } -func (a ByCreated) Less(i, j int) bool { return a[i].Created.After(a[j].Created) } - -// TODO: these removals are racy, we should make dockerclient threadsafe across List/Inspect transactions. -func (kl *Kubelet) purgeOldest(ids []string) error { - dockerData := []*docker.Container{} - for _, id := range ids { - data, err := kl.dockerClient.InspectContainer(id) - if err != nil { - return err - } - if !data.State.Running && (time.Now().Sub(data.State.FinishedAt) > kl.minimumGCAge) { - dockerData = append(dockerData, data) - } - } - sort.Sort(ByCreated(dockerData)) - if len(dockerData) <= kl.maxContainerCount { - return nil - } - dockerData = dockerData[kl.maxContainerCount:] - for _, data := range dockerData { - if err := kl.dockerClient.RemoveContainer(docker.RemoveContainerOptions{ID: data.ID}); err != nil { - return err - } - } - - return nil -} - func (kl *Kubelet) GarbageCollectLoop() { util.Forever(func() { - if err := kl.GarbageCollectContainers(); err != nil { - glog.Errorf("Garbage collect failed: %v", err) + if err := kl.containerGC.GarbageCollect(); err != nil { + glog.Errorf("Container garbage collect failed: %v", err) } }, time.Minute*1) } -// TODO: Also enforce a maximum total number of containers. -func (kl *Kubelet) GarbageCollectContainers() error { - if kl.maxContainerCount == 0 { - return nil - } - containers, err := dockertools.GetKubeletDockerContainers(kl.dockerClient, true) - if err != nil { - return err - } - - type unidentifiedContainer struct { - // Docker ID. - id string - - // Docker container name - name string - } - - unidentifiedContainers := make([]unidentifiedContainer, 0) - uidToIDMap := map[string][]string{} - for _, container := range containers { - _, uid, name, _, err := dockertools.ParseDockerName(container.Names[0]) - if err != nil { - unidentifiedContainers = append(unidentifiedContainers, unidentifiedContainer{ - id: container.ID, - name: container.Names[0], - }) - continue - } - uidName := string(uid) + "." + name - uidToIDMap[uidName] = append(uidToIDMap[uidName], container.ID) - } - - // Remove all non-running unidentified containers. - for _, container := range unidentifiedContainers { - data, err := kl.dockerClient.InspectContainer(container.id) - if err != nil { - return err - } - if data.State.Running { - continue - } - - glog.Infof("Removing unidentified dead container %q with ID %q", container.name, container.id) - err = kl.dockerClient.RemoveContainer(docker.RemoveContainerOptions{ID: container.id}) - if err != nil { - return err - } - } - - // Evict dead containers according to our policies. - for _, list := range uidToIDMap { - if len(list) <= kl.maxContainerCount { - continue - } - if err := kl.purgeOldest(list); err != nil { - return err - } - } - - return nil -} - func (kl *Kubelet) getPodStatusFromCache(podFullName string) (api.PodStatus, bool) { kl.podStatusesLock.RLock() defer kl.podStatusesLock.RUnlock() diff --git a/pkg/kubelet/kubelet_test.go b/pkg/kubelet/kubelet_test.go index 89fecc75952..ed26deedaa2 100644 --- a/pkg/kubelet/kubelet_test.go +++ b/pkg/kubelet/kubelet_test.go @@ -139,7 +139,7 @@ func verifyStringArrayEqualsAnyOrder(t *testing.T, actual, expected []string) { } } if !found { - t.Errorf("Expected element %s not found in %#v", exp, actual) + t.Errorf("Expected element %q not found in %#v", exp, actual) } } } @@ -1692,364 +1692,6 @@ func TestSyncPodEventHandlerFails(t *testing.T) { } } -func TestKubeletGarbageCollection(t *testing.T) { - tests := []struct { - containers []docker.APIContainers - containerDetails map[string]*docker.Container - expectedRemoved []string - }{ - // Remove oldest containers. - { - containers: []docker.APIContainers{ - { - // pod infra container - Names: []string{"/k8s_POD_foo_new_.deadbeef_42"}, - ID: "1876", - }, - { - // pod infra container - Names: []string{"/k8s_POD_foo_new_.deadbeef_42"}, - ID: "2876", - }, - { - // pod infra container - Names: []string{"/k8s_POD_foo_new_.deadbeef_42"}, - ID: "3876", - }, - }, - containerDetails: map[string]*docker.Container{ - "1876": { - State: docker.State{ - Running: false, - }, - ID: "1876", - Created: time.Now(), - }, - }, - expectedRemoved: []string{"1876"}, - }, - // Only remove non-running containers. - { - containers: []docker.APIContainers{ - { - // pod infra container - Names: []string{"/k8s_POD_foo_new_.deadbeef_42"}, - ID: "1876", - }, - { - // pod infra container - Names: []string{"/k8s_POD_foo_new_.deadbeef_42"}, - ID: "2876", - }, - { - // pod infra container - Names: []string{"/k8s_POD_foo_new_.deadbeef_42"}, - ID: "3876", - }, - { - // pod infra container - Names: []string{"/k8s_POD_foo_new_.deadbeef_42"}, - ID: "4876", - }, - }, - containerDetails: map[string]*docker.Container{ - "1876": { - State: docker.State{ - Running: true, - }, - ID: "1876", - Created: time.Now(), - }, - "2876": { - State: docker.State{ - Running: false, - }, - ID: "2876", - Created: time.Now(), - }, - }, - expectedRemoved: []string{"2876"}, - }, - // Less than maxContainerCount doesn't delete any. - { - containers: []docker.APIContainers{ - { - // pod infra container - Names: []string{"/k8s_POD_foo_new_.deadbeef_42"}, - ID: "1876", - }, - }, - }, - // maxContainerCount applies per container.. - { - containers: []docker.APIContainers{ - { - // pod infra container - Names: []string{"/k8s_POD_foo2_new_.beefbeef_40"}, - ID: "1706", - }, - { - // pod infra container - Names: []string{"/k8s_POD_foo2_new_.beefbeef_40"}, - ID: "2706", - }, - { - // pod infra container - Names: []string{"/k8s_POD_foo2_new_.beefbeef_40"}, - ID: "3706", - }, - { - // pod infra container - Names: []string{"/k8s_POD_foo_new_.deadbeef_42"}, - ID: "1876", - }, - { - // pod infra container - Names: []string{"/k8s_POD_foo_new_.deadbeef_42"}, - ID: "2876", - }, - { - // pod infra container - Names: []string{"/k8s_POD_foo_new_.deadbeef_42"}, - ID: "3876", - }, - }, - containerDetails: map[string]*docker.Container{ - "1706": { - State: docker.State{ - Running: false, - }, - ID: "1706", - Created: time.Now(), - }, - "1876": { - State: docker.State{ - Running: false, - }, - ID: "1876", - Created: time.Now(), - }, - }, - expectedRemoved: []string{"1706", "1876"}, - }, - // Remove non-running unidentified Kubernetes containers. - { - containers: []docker.APIContainers{ - { - // Unidentified Kubernetes container. - Names: []string{"/k8s_unidentified"}, - ID: "1876", - }, - { - // Unidentified (non-running) Kubernetes container. - Names: []string{"/k8s_unidentified"}, - ID: "2309", - }, - { - // Regular Kubernetes container. - Names: []string{"/k8s_POD_foo_new_.deadbeef_42"}, - ID: "3876", - }, - }, - containerDetails: map[string]*docker.Container{ - "1876": { - State: docker.State{ - Running: false, - }, - ID: "1876", - Created: time.Now(), - }, - "2309": { - State: docker.State{ - Running: true, - }, - ID: "2309", - Created: time.Now(), - }, - }, - expectedRemoved: []string{"1876"}, - }, - } - for _, test := range tests { - testKubelet := newTestKubelet(t) - kubelet := testKubelet.kubelet - fakeDocker := testKubelet.fakeDocker - kubelet.maxContainerCount = 2 - fakeDocker.ContainerList = test.containers - fakeDocker.ContainerMap = test.containerDetails - fakeDocker.Container = &docker.Container{ID: "error", Created: time.Now()} - err := kubelet.GarbageCollectContainers() - if err != nil { - t.Errorf("unexpected error: %v", err) - } - verifyStringArrayEqualsAnyOrder(t, test.expectedRemoved, fakeDocker.Removed) - } -} - -func TestPurgeOldest(t *testing.T) { - created := time.Now() - tests := []struct { - ids []string - containerDetails map[string]*docker.Container - expectedRemoved []string - }{ - { - ids: []string{"1", "2", "3", "4", "5"}, - containerDetails: map[string]*docker.Container{ - "1": { - State: docker.State{ - Running: true, - }, - ID: "1", - Created: created, - }, - "2": { - State: docker.State{ - Running: false, - }, - ID: "2", - Created: created.Add(time.Second), - }, - "3": { - State: docker.State{ - Running: false, - }, - ID: "3", - Created: created.Add(time.Second), - }, - "4": { - State: docker.State{ - Running: false, - }, - ID: "4", - Created: created.Add(time.Second), - }, - "5": { - State: docker.State{ - Running: false, - }, - ID: "5", - Created: created.Add(time.Second), - }, - }, - }, - { - ids: []string{"1", "2", "3", "4", "5", "6"}, - containerDetails: map[string]*docker.Container{ - "1": { - State: docker.State{ - Running: false, - }, - ID: "1", - Created: created.Add(time.Second), - }, - "2": { - State: docker.State{ - Running: false, - }, - ID: "2", - Created: created.Add(time.Millisecond), - }, - "3": { - State: docker.State{ - Running: false, - }, - ID: "3", - Created: created.Add(time.Second), - }, - "4": { - State: docker.State{ - Running: false, - }, - ID: "4", - Created: created.Add(time.Second), - }, - "5": { - State: docker.State{ - Running: false, - }, - ID: "5", - Created: created.Add(time.Second), - }, - "6": { - State: docker.State{ - Running: false, - }, - ID: "6", - Created: created.Add(time.Second), - }, - }, - expectedRemoved: []string{"2"}, - }, - { - ids: []string{"1", "2", "3", "4", "5", "6", "7"}, - containerDetails: map[string]*docker.Container{ - "1": { - State: docker.State{ - Running: false, - }, - ID: "1", - Created: created.Add(time.Second), - }, - "2": { - State: docker.State{ - Running: false, - }, - ID: "2", - Created: created.Add(time.Millisecond), - }, - "3": { - State: docker.State{ - Running: false, - }, - ID: "3", - Created: created.Add(time.Second), - }, - "4": { - State: docker.State{ - Running: false, - }, - ID: "4", - Created: created.Add(time.Second), - }, - "5": { - State: docker.State{ - Running: false, - }, - ID: "5", - Created: created.Add(time.Second), - }, - "6": { - State: docker.State{ - Running: false, - }, - ID: "6", - Created: created.Add(time.Microsecond), - }, - "7": { - State: docker.State{ - Running: false, - }, - ID: "7", - Created: created.Add(time.Second), - }, - }, - expectedRemoved: []string{"2", "6"}, - }, - } - for _, test := range tests { - testKubelet := newTestKubelet(t) - kubelet := testKubelet.kubelet - fakeDocker := testKubelet.fakeDocker - kubelet.maxContainerCount = 5 - fakeDocker.ContainerMap = test.containerDetails - kubelet.purgeOldest(test.ids) - if !reflect.DeepEqual(fakeDocker.Removed, test.expectedRemoved) { - t.Errorf("expected: %v, got: %v", test.expectedRemoved, fakeDocker.Removed) - } - } -} - func TestSyncPodsWithPullPolicy(t *testing.T) { testKubelet := newTestKubelet(t) kubelet := testKubelet.kubelet