diff --git a/pkg/kubelet/container_gc.go b/pkg/kubelet/container_gc.go new file mode 100644 index 00000000000..ef86a552cb5 --- /dev/null +++ b/pkg/kubelet/container_gc.go @@ -0,0 +1,234 @@ +/* +Copyright 2014 Google Inc. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package kubelet + +import ( + "fmt" + "sort" + "time" + + "github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/dockertools" + "github.com/GoogleCloudPlatform/kubernetes/pkg/types" + "github.com/fsouza/go-dockerclient" + "github.com/golang/glog" +) + +// Specified a policy for garbage collecting containers. +type ContainerGCPolicy struct { + // Minimum age at which a container can be garbage collected, zero for no limit. + MinAge time.Duration + + // Max number of dead containers any single pod (UID, container name) pair is + // allowed to have, less than zero for no limit. + MaxPerPodContainer int + + // Max number of total dead containers, less than zero for no limit. + MaxContainers int +} + +// Manages garbage collection of dead containers. +// +// Implementation is thread-compatible. +type containerGC interface { + // Garbage collect containers. + GarbageCollect() error +} + +// TODO(vmarmol): Preferentially remove pod infra containers. +type realContainerGC struct { + // Docker client to use. + dockerClient dockertools.DockerInterface + + // Policy for garbage collection. + policy ContainerGCPolicy +} + +// New containerGC instance with the specified policy. +func newContainerGC(dockerClient dockertools.DockerInterface, policy ContainerGCPolicy) (containerGC, error) { + if policy.MinAge < 0 { + return nil, fmt.Errorf("invalid minimum garbage collection age: %v", policy.MinAge) + } + + return &realContainerGC{ + dockerClient: dockerClient, + policy: policy, + }, nil +} + +// Internal information kept for containers being considered for GC. +type containerGCInfo struct { + // Docker ID of the container. + id string + + // Docker name of the container. + name string + + // Creation time for the container. + createTime time.Time +} + +// Containers are considered for eviction as units of (UID, container name) pair. +type evictUnit struct { + // UID of the pod. + uid types.UID + + // Name of the container in the pod. + name string +} +type containersByEvictUnit map[evictUnit][]containerGCInfo + +// Returns the number of containers in this map. +func (self containersByEvictUnit) NumContainers() int { + num := 0 + for key := range self { + num += len(self[key]) + } + + return num +} + +// Returns the number of pod in this map. +func (self containersByEvictUnit) NumEvictUnits() int { + return len(self) +} + +// Newest first. +type byCreated []containerGCInfo + +func (a byCreated) Len() int { return len(a) } +func (a byCreated) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a byCreated) Less(i, j int) bool { return a[i].createTime.After(a[j].createTime) } + +func (self *realContainerGC) GarbageCollect() error { + // Separate containers by evict units. + evictUnits, unidentifiedContainers, err := self.evictableContainers() + if err != nil { + return err + } + + // Remove unidentified containers. + for _, container := range unidentifiedContainers { + glog.Infof("Removing unidentified dead container %q with ID %q", container.name, container.id) + err = self.dockerClient.RemoveContainer(docker.RemoveContainerOptions{ID: container.id}) + if err != nil { + glog.Warningf("Failed to remove unidentified dead container %q: %v", container.name, err) + } + } + + // Enforce max containers per evict unit. + if self.policy.MaxPerPodContainer >= 0 { + self.enforceMaxContainersPerEvictUnit(evictUnits, self.policy.MaxPerPodContainer) + } + + // Enforce max total number of containers. + if self.policy.MaxContainers >= 0 && evictUnits.NumContainers() > self.policy.MaxContainers { + // Leave an equal number of containers per evict unit (min: 1). + numContainersPerEvictUnit := self.policy.MaxContainers / evictUnits.NumEvictUnits() + if numContainersPerEvictUnit < 1 { + numContainersPerEvictUnit = 1 + } + self.enforceMaxContainersPerEvictUnit(evictUnits, numContainersPerEvictUnit) + + // If we still need to evict, evict oldest first. + numContainers := evictUnits.NumContainers() + if numContainers > self.policy.MaxContainers { + flattened := make([]containerGCInfo, 0, numContainers) + for uid := range evictUnits { + flattened = append(flattened, evictUnits[uid]...) + } + sort.Sort(byCreated(flattened)) + + self.removeOldestN(flattened, numContainers-self.policy.MaxContainers) + } + } + + return nil +} + +func (self *realContainerGC) enforceMaxContainersPerEvictUnit(evictUnits containersByEvictUnit, MaxContainers int) { + for uid := range evictUnits { + toRemove := len(evictUnits[uid]) - MaxContainers + + if toRemove > 0 { + evictUnits[uid] = self.removeOldestN(evictUnits[uid], toRemove) + } + } +} + +// Removes the oldest toRemove containers and returns the resulting slice. +func (self *realContainerGC) removeOldestN(containers []containerGCInfo, toRemove int) []containerGCInfo { + // Remove from oldest to newest (last to first). + numToKeep := len(containers) - toRemove + for i := numToKeep; i < len(containers); i++ { + err := self.dockerClient.RemoveContainer(docker.RemoveContainerOptions{ID: containers[i].id}) + if err != nil { + glog.Warningf("Failed to remove dead container %q: %v", containers[i].name, err) + } + } + + // Assume we removed the containers so that we're not too aggressive. + return containers[:numToKeep] +} + +// Get all containers that are evictable. Evictable containers are: not running +// and created more than MinAge ago. +func (self *realContainerGC) evictableContainers() (containersByEvictUnit, []containerGCInfo, error) { + containers, err := dockertools.GetKubeletDockerContainers(self.dockerClient, true) + if err != nil { + return containersByEvictUnit{}, []containerGCInfo{}, err + } + + unidentifiedContainers := make([]containerGCInfo, 0) + evictUnits := make(containersByEvictUnit) + newestGCTime := time.Now().Add(-self.policy.MinAge) + for _, container := range containers { + // Prune out running containers. + data, err := self.dockerClient.InspectContainer(container.ID) + if err != nil { + // Container may have been removed already, skip. + continue + } else if data.State.Running { + continue + } else if newestGCTime.Before(data.Created) { + continue + } + + containerInfo := containerGCInfo{ + id: container.ID, + name: container.Names[0], + createTime: data.Created, + } + + _, uid, name, _, err := dockertools.ParseDockerName(container.Names[0]) + if err != nil { + unidentifiedContainers = append(unidentifiedContainers, containerInfo) + } else { + key := evictUnit{ + uid: uid, + name: name, + } + evictUnits[key] = append(evictUnits[key], containerInfo) + } + } + + // Sort the containers by age. + for uid := range evictUnits { + sort.Sort(byCreated(evictUnits[uid])) + } + + return evictUnits, unidentifiedContainers, nil +} diff --git a/pkg/kubelet/container_gc_test.go b/pkg/kubelet/container_gc_test.go new file mode 100644 index 00000000000..514fe3c98ef --- /dev/null +++ b/pkg/kubelet/container_gc_test.go @@ -0,0 +1,302 @@ +/* +Copyright 2014 Google Inc. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package kubelet + +import ( + "fmt" + "testing" + "time" + + "github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/dockertools" + "github.com/fsouza/go-dockerclient" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func newTestContainerGC(t *testing.T, MinAge time.Duration, MaxPerPodContainer, MaxContainers int) (containerGC, *dockertools.FakeDockerClient) { + fakeDocker := new(dockertools.FakeDockerClient) + gc, err := newContainerGC(fakeDocker, ContainerGCPolicy{ + MinAge: MinAge, + MaxPerPodContainer: MaxPerPodContainer, + MaxContainers: MaxContainers, + }) + require.Nil(t, err) + return gc, fakeDocker +} + +// Makes a stable time object, lower id is earlier time. +func makeTime(id int) time.Time { + return zero.Add(time.Duration(id) * time.Second) +} + +// Makes an API object with the specified Docker ID and pod UID. +func makeAPIContainer(uid, name, dockerID string) docker.APIContainers { + return docker.APIContainers{ + Names: []string{fmt.Sprintf("/k8s_%s_bar_new_%s_42", name, uid)}, + ID: dockerID, + } +} + +// Makes a function that adds to a map a detailed container with the specified properties. +func makeContainerDetail(id string, running bool, created time.Time) func(map[string]*docker.Container) { + return func(m map[string]*docker.Container) { + m[id] = &docker.Container{ + State: docker.State{ + Running: running, + }, + ID: id, + Created: created, + } + } +} + +// Makes a detailed container map from the specified functions. +func makeContainerDetailMap(funcs ...func(map[string]*docker.Container)) map[string]*docker.Container { + m := make(map[string]*docker.Container, len(funcs)) + for _, f := range funcs { + f(m) + } + return m +} + +func TestGarbageCollectZeroMaxContainers(t *testing.T) { + gc, fakeDocker := newTestContainerGC(t, time.Minute, 1, 0) + fakeDocker.ContainerList = []docker.APIContainers{ + makeAPIContainer("foo", "POD", "1876"), + } + fakeDocker.ContainerMap = makeContainerDetailMap( + makeContainerDetail("1876", false, makeTime(0)), + ) + + assert.Nil(t, gc.GarbageCollect()) + assert.Len(t, fakeDocker.Removed, 1) +} + +func TestGarbageCollectNoMaxPerPodContainerLimit(t *testing.T) { + gc, fakeDocker := newTestContainerGC(t, time.Minute, -1, 4) + fakeDocker.ContainerList = []docker.APIContainers{ + makeAPIContainer("foo", "POD", "1876"), + makeAPIContainer("foo1", "POD", "2876"), + makeAPIContainer("foo2", "POD", "3876"), + makeAPIContainer("foo3", "POD", "4876"), + makeAPIContainer("foo4", "POD", "5876"), + } + fakeDocker.ContainerMap = makeContainerDetailMap( + makeContainerDetail("1876", false, makeTime(0)), + makeContainerDetail("2876", false, makeTime(1)), + makeContainerDetail("3876", false, makeTime(2)), + makeContainerDetail("4876", false, makeTime(3)), + makeContainerDetail("5876", false, makeTime(4)), + ) + + assert.Nil(t, gc.GarbageCollect()) + assert.Len(t, fakeDocker.Removed, 1) +} + +func TestGarbageCollectNoMaxLimit(t *testing.T) { + gc, fakeDocker := newTestContainerGC(t, time.Minute, 1, -1) + fakeDocker.ContainerList = []docker.APIContainers{ + makeAPIContainer("foo", "POD", "1876"), + makeAPIContainer("foo1", "POD", "2876"), + makeAPIContainer("foo2", "POD", "3876"), + makeAPIContainer("foo3", "POD", "4876"), + makeAPIContainer("foo4", "POD", "5876"), + } + fakeDocker.ContainerMap = makeContainerDetailMap( + makeContainerDetail("1876", false, makeTime(0)), + makeContainerDetail("2876", false, makeTime(0)), + makeContainerDetail("3876", false, makeTime(0)), + makeContainerDetail("4876", false, makeTime(0)), + makeContainerDetail("5876", false, makeTime(0)), + ) + + assert.Nil(t, gc.GarbageCollect()) + assert.Len(t, fakeDocker.Removed, 0) +} + +func TestGarbageCollect(t *testing.T) { + tests := []struct { + containers []docker.APIContainers + containerDetails map[string]*docker.Container + expectedRemoved []string + }{ + // Don't remove containers started recently. + { + containers: []docker.APIContainers{ + makeAPIContainer("foo", "POD", "1876"), + makeAPIContainer("foo", "POD", "2876"), + makeAPIContainer("foo", "POD", "3876"), + }, + containerDetails: makeContainerDetailMap( + makeContainerDetail("1876", false, time.Now()), + makeContainerDetail("2876", false, time.Now()), + makeContainerDetail("3876", false, time.Now()), + ), + }, + // Remove oldest containers. + { + containers: []docker.APIContainers{ + makeAPIContainer("foo", "POD", "1876"), + makeAPIContainer("foo", "POD", "2876"), + makeAPIContainer("foo", "POD", "3876"), + }, + containerDetails: makeContainerDetailMap( + makeContainerDetail("1876", false, makeTime(0)), + makeContainerDetail("2876", false, makeTime(1)), + makeContainerDetail("3876", false, makeTime(2)), + ), + expectedRemoved: []string{"1876"}, + }, + // Only remove non-running containers. + { + containers: []docker.APIContainers{ + makeAPIContainer("foo", "POD", "1876"), + makeAPIContainer("foo", "POD", "2876"), + makeAPIContainer("foo", "POD", "3876"), + makeAPIContainer("foo", "POD", "4876"), + }, + containerDetails: makeContainerDetailMap( + makeContainerDetail("1876", true, makeTime(0)), + makeContainerDetail("2876", false, makeTime(1)), + makeContainerDetail("3876", false, makeTime(2)), + makeContainerDetail("4876", false, makeTime(3)), + ), + expectedRemoved: []string{"2876"}, + }, + // Less than maxContainerCount doesn't delete any. + { + containers: []docker.APIContainers{ + makeAPIContainer("foo", "POD", "1876"), + }, + containerDetails: makeContainerDetailMap( + makeContainerDetail("1876", false, makeTime(0)), + ), + }, + // maxContainerCount applies per (UID,container) pair. + { + containers: []docker.APIContainers{ + makeAPIContainer("foo", "POD", "1876"), + makeAPIContainer("foo", "POD", "2876"), + makeAPIContainer("foo", "POD", "3876"), + makeAPIContainer("foo", "bar", "1076"), + makeAPIContainer("foo", "bar", "2076"), + makeAPIContainer("foo", "bar", "3076"), + makeAPIContainer("foo2", "POD", "1176"), + makeAPIContainer("foo2", "POD", "2176"), + makeAPIContainer("foo2", "POD", "3176"), + }, + containerDetails: makeContainerDetailMap( + makeContainerDetail("1876", false, makeTime(0)), + makeContainerDetail("2876", false, makeTime(1)), + makeContainerDetail("3876", false, makeTime(2)), + makeContainerDetail("1076", false, makeTime(0)), + makeContainerDetail("2076", false, makeTime(1)), + makeContainerDetail("3076", false, makeTime(2)), + makeContainerDetail("1176", false, makeTime(0)), + makeContainerDetail("2176", false, makeTime(1)), + makeContainerDetail("3176", false, makeTime(2)), + ), + expectedRemoved: []string{"1076", "1176", "1876"}, + }, + // Remove non-running unidentified Kubernetes containers. + { + containers: []docker.APIContainers{ + { + // Unidentified Kubernetes container. + Names: []string{"/k8s_unidentified"}, + ID: "1876", + }, + { + // Unidentified (non-running) Kubernetes container. + Names: []string{"/k8s_unidentified"}, + ID: "2876", + }, + makeAPIContainer("foo", "POD", "3876"), + }, + containerDetails: makeContainerDetailMap( + makeContainerDetail("1876", true, makeTime(0)), + makeContainerDetail("2876", false, makeTime(0)), + makeContainerDetail("3876", false, makeTime(0)), + ), + expectedRemoved: []string{"2876"}, + }, + // Max limit applied and tries to keep from every pod. + { + containers: []docker.APIContainers{ + makeAPIContainer("foo", "POD", "1876"), + makeAPIContainer("foo", "POD", "2876"), + makeAPIContainer("foo1", "POD", "3876"), + makeAPIContainer("foo1", "POD", "4876"), + makeAPIContainer("foo2", "POD", "5876"), + makeAPIContainer("foo2", "POD", "6876"), + makeAPIContainer("foo3", "POD", "7876"), + makeAPIContainer("foo3", "POD", "8876"), + makeAPIContainer("foo4", "POD", "9876"), + makeAPIContainer("foo4", "POD", "10876"), + }, + containerDetails: makeContainerDetailMap( + makeContainerDetail("1876", false, makeTime(0)), + makeContainerDetail("2876", false, makeTime(1)), + makeContainerDetail("3876", false, makeTime(0)), + makeContainerDetail("4876", false, makeTime(1)), + makeContainerDetail("5876", false, makeTime(0)), + makeContainerDetail("6876", false, makeTime(1)), + makeContainerDetail("7876", false, makeTime(0)), + makeContainerDetail("8876", false, makeTime(1)), + makeContainerDetail("9876", false, makeTime(0)), + makeContainerDetail("10876", false, makeTime(1)), + ), + expectedRemoved: []string{"1876", "3876", "5876", "7876", "9876"}, + }, + // If more pods than limit allows, evicts oldest pod. + { + containers: []docker.APIContainers{ + makeAPIContainer("foo", "POD", "1876"), + makeAPIContainer("foo", "POD", "2876"), + makeAPIContainer("foo1", "POD", "3876"), + makeAPIContainer("foo1", "POD", "4876"), + makeAPIContainer("foo2", "POD", "5876"), + makeAPIContainer("foo3", "POD", "6876"), + makeAPIContainer("foo4", "POD", "7876"), + makeAPIContainer("foo5", "POD", "8876"), + makeAPIContainer("foo6", "POD", "9876"), + makeAPIContainer("foo7", "POD", "10876"), + }, + containerDetails: makeContainerDetailMap( + makeContainerDetail("1876", false, makeTime(1)), + makeContainerDetail("2876", false, makeTime(2)), + makeContainerDetail("3876", false, makeTime(1)), + makeContainerDetail("4876", false, makeTime(2)), + makeContainerDetail("5876", false, makeTime(0)), + makeContainerDetail("6876", false, makeTime(1)), + makeContainerDetail("7876", false, makeTime(0)), + makeContainerDetail("8876", false, makeTime(1)), + makeContainerDetail("9876", false, makeTime(2)), + makeContainerDetail("10876", false, makeTime(1)), + ), + expectedRemoved: []string{"1876", "3876", "5876", "7876"}, + }, + } + for i, test := range tests { + t.Logf("Running test case with index %d", i) + gc, fakeDocker := newTestContainerGC(t, time.Hour, 2, 6) + fakeDocker.ContainerList = test.containers + fakeDocker.ContainerMap = test.containerDetails + assert.Nil(t, gc.GarbageCollect()) + verifyStringArrayEqualsAnyOrder(t, fakeDocker.Removed, test.expectedRemoved) + } +}