diff --git a/pkg/kubelet/container/fake_runtime.go b/pkg/kubelet/container/fake_runtime.go index 2a119ab20f0..ce20e334936 100644 --- a/pkg/kubelet/container/fake_runtime.go +++ b/pkg/kubelet/container/fake_runtime.go @@ -304,3 +304,11 @@ func (f *FakeRuntime) PortForward(pod *Pod, port uint16, stream io.ReadWriteClos f.CalledFunctions = append(f.CalledFunctions, "PortForward") return f.Err } + +func (f *FakeRuntime) GarbageCollect(maxPerPodContainer, maxContainers int, minAge time.Duration) error { + f.Lock() + defer f.Unlock() + + f.CalledFunctions = append(f.CalledFunctions, "GarbageCollect") + return f.Err +} diff --git a/pkg/kubelet/dockertools/container_gc.go b/pkg/kubelet/dockertools/container_gc.go new file mode 100644 index 00000000000..fa1da6327dd --- /dev/null +++ b/pkg/kubelet/dockertools/container_gc.go @@ -0,0 +1,232 @@ +/* +Copyright 2014 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package dockertools + +import ( + "fmt" + "os" + "path" + "path/filepath" + "sort" + "time" + + docker "github.com/fsouza/go-dockerclient" + "github.com/golang/glog" + "k8s.io/kubernetes/pkg/types" +) + +type containerGC struct { + client DockerInterface + containerLogsDir string +} + +func NewContainerGC(client DockerInterface, containerLogsDir string) *containerGC { + return &containerGC{client: client, containerLogsDir: containerLogsDir} +} + +// Internal information kept for containers being considered for GC. +type containerGCInfo struct { + // Docker ID of the container. + id string + + // Docker name of the container. + name string + + // Creation time for the container. + createTime time.Time + + // Full pod name, including namespace in the format `namespace_podName`. + // This comes from dockertools.ParseDockerName(...) + podNameWithNamespace string + + // Container name in pod + containerName string +} + +// Containers are considered for eviction as units of (UID, container name) pair. +type evictUnit struct { + // UID of the pod. + uid types.UID + + // Name of the container in the pod. + name string +} + +type containersByEvictUnit map[evictUnit][]containerGCInfo + +// Returns the number of containers in this map. +func (cu containersByEvictUnit) NumContainers() int { + num := 0 + for key := range cu { + num += len(cu[key]) + } + + return num +} + +// Returns the number of pod in this map. +func (cu containersByEvictUnit) NumEvictUnits() int { + return len(cu) +} + +// Newest first. +type byCreated []containerGCInfo + +func (a byCreated) Len() int { return len(a) } +func (a byCreated) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a byCreated) Less(i, j int) bool { return a[i].createTime.After(a[j].createTime) } + +func (cgc *containerGC) enforceMaxContainersPerEvictUnit(evictUnits containersByEvictUnit, MaxContainers int) { + for uid := range evictUnits { + toRemove := len(evictUnits[uid]) - MaxContainers + + if toRemove > 0 { + evictUnits[uid] = cgc.removeOldestN(evictUnits[uid], toRemove) + } + } +} + +// Removes the oldest toRemove containers and returns the resulting slice. +func (cgc *containerGC) removeOldestN(containers []containerGCInfo, toRemove int) []containerGCInfo { + // Remove from oldest to newest (last to first). + numToKeep := len(containers) - toRemove + for i := numToKeep; i < len(containers); i++ { + err := cgc.client.RemoveContainer(docker.RemoveContainerOptions{ID: containers[i].id, RemoveVolumes: true}) + if err != nil { + glog.Warningf("Failed to remove dead container %q: %v", containers[i].name, err) + } + symlinkPath := LogSymlink(cgc.containerLogsDir, containers[i].podNameWithNamespace, containers[i].containerName, containers[i].id) + err = os.Remove(symlinkPath) + if err != nil && !os.IsNotExist(err) { + glog.Warningf("Failed to remove container %q log symlink %q: %v", containers[i].name, symlinkPath, err) + } + } + + // Assume we removed the containers so that we're not too aggressive. + return containers[:numToKeep] +} + +// Get all containers that are evictable. Evictable containers are: not running +// and created more than MinAge ago. +func (cgc *containerGC) evictableContainers(minAge time.Duration) (containersByEvictUnit, []containerGCInfo, error) { + containers, err := GetKubeletDockerContainers(cgc.client, true) + if err != nil { + return containersByEvictUnit{}, []containerGCInfo{}, err + } + + unidentifiedContainers := make([]containerGCInfo, 0) + evictUnits := make(containersByEvictUnit) + newestGCTime := time.Now().Add(-minAge) + for _, container := range containers { + // Prune out running containers. + data, err := cgc.client.InspectContainer(container.ID) + if err != nil { + // Container may have been removed already, skip. + continue + } else if data.State.Running { + continue + } else if newestGCTime.Before(data.Created) { + continue + } + + containerInfo := containerGCInfo{ + id: container.ID, + name: container.Names[0], + createTime: data.Created, + } + + containerName, _, err := ParseDockerName(container.Names[0]) + + if err != nil { + unidentifiedContainers = append(unidentifiedContainers, containerInfo) + } else { + key := evictUnit{ + uid: containerName.PodUID, + name: containerName.ContainerName, + } + containerInfo.podNameWithNamespace = containerName.PodFullName + containerInfo.containerName = containerName.ContainerName + evictUnits[key] = append(evictUnits[key], containerInfo) + } + } + + // Sort the containers by age. + for uid := range evictUnits { + sort.Sort(byCreated(evictUnits[uid])) + } + + return evictUnits, unidentifiedContainers, nil +} + +// Garbage collection of dead containers +func (cgc *containerGC) GarbageCollect(maxPerPodContainer, maxContainers int, minAge time.Duration) error { + // Separate containers by evict units. + evictUnits, unidentifiedContainers, err := cgc.evictableContainers(minAge) + if err != nil { + return err + } + + // Remove unidentified containers. + for _, container := range unidentifiedContainers { + glog.Infof("Removing unidentified dead container %q with ID %q", container.name, container.id) + err = cgc.client.RemoveContainer(docker.RemoveContainerOptions{ID: container.id, RemoveVolumes: true}) + if err != nil { + glog.Warningf("Failed to remove unidentified dead container %q: %v", container.name, err) + } + } + + // Enforce max containers per evict unit. + if maxPerPodContainer >= 0 { + cgc.enforceMaxContainersPerEvictUnit(evictUnits, maxPerPodContainer) + } + + // Enforce max total number of containers. + if maxContainers >= 0 && evictUnits.NumContainers() > maxContainers { + // Leave an equal number of containers per evict unit (min: 1). + numContainersPerEvictUnit := maxContainers / evictUnits.NumEvictUnits() + if numContainersPerEvictUnit < 1 { + numContainersPerEvictUnit = 1 + } + cgc.enforceMaxContainersPerEvictUnit(evictUnits, numContainersPerEvictUnit) + + // If we still need to evict, evict oldest first. + numContainers := evictUnits.NumContainers() + if numContainers > maxContainers { + flattened := make([]containerGCInfo, 0, numContainers) + for uid := range evictUnits { + flattened = append(flattened, evictUnits[uid]...) + } + sort.Sort(byCreated(flattened)) + + cgc.removeOldestN(flattened, numContainers-maxContainers) + } + } + + // Remove dead symlinks - should only happen on upgrade + // from a k8s version without proper log symlink cleanup + logSymlinks, _ := filepath.Glob(path.Join(cgc.containerLogsDir, fmt.Sprintf("*.%s", LogSuffix))) + for _, logSymlink := range logSymlinks { + if _, err = os.Stat(logSymlink); os.IsNotExist(err) { + err = os.Remove(logSymlink) + if err != nil { + glog.Warningf("Failed to remove container log dead symlink %q: %v", logSymlink, err) + } + } + } + + return nil +} diff --git a/pkg/kubelet/dockertools/manager.go b/pkg/kubelet/dockertools/manager.go index bc8f4b11c83..bfbdf3ef581 100644 --- a/pkg/kubelet/dockertools/manager.go +++ b/pkg/kubelet/dockertools/manager.go @@ -139,6 +139,9 @@ type DockerManager struct { // If true, enforce container cpu limits with CFS quota support cpuCFSQuota bool + + // Container GC manager + containerGC *containerGC } func NewDockerManager( @@ -214,6 +217,7 @@ func NewDockerManager( } dm.runner = lifecycle.NewHandlerRunner(httpClient, dm, dm) dm.imagePuller = kubecontainer.NewImagePuller(recorder, dm, imageBackOff) + dm.containerGC = NewContainerGC(client, containerLogsDir) return dm } @@ -2019,3 +2023,8 @@ func (dm *DockerManager) GetNetNs(containerID kubecontainer.ContainerID) (string netnsPath := fmt.Sprintf(DockerNetnsFmt, inspectResult.State.Pid) return netnsPath, nil } + +// Garbage collection of dead containers +func (dm *DockerManager) GarbageCollect(maxPerPodContainer, maxContainers int, minAge time.Duration) error { + return dm.containerGC.GarbageCollect(maxPerPodContainer, maxContainers, minAge) +} diff --git a/pkg/kubelet/rkt/rkt.go b/pkg/kubelet/rkt/rkt.go index 4fb510dd232..b1f825e23dd 100644 --- a/pkg/kubelet/rkt/rkt.go +++ b/pkg/kubelet/rkt/rkt.go @@ -1083,7 +1083,7 @@ func (r *Runtime) GetContainerLogs(pod *api.Pod, containerID kubecontainer.Conta // GarbageCollect collects the pods/containers. // TODO(yifan): Enforce the gc policy, also, it would be better if we can // just GC kubernetes pods. -func (r *Runtime) GarbageCollect() error { +func (r *runtime) GarbageCollect(maxPerPodContainer, maxContainers int, minAge time.Duration) error { if err := exec.Command("systemctl", "reset-failed").Run(); err != nil { glog.Errorf("rkt: Failed to reset failed systemd services: %v", err) }