mirror of
				https://github.com/k3s-io/kubernetes.git
				synced 2025-11-03 23:40:03 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			396 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			396 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
/*
 | 
						|
Copyright 2016 The Kubernetes Authors.
 | 
						|
 | 
						|
Licensed under the Apache License, Version 2.0 (the "License");
 | 
						|
you may not use this file except in compliance with the License.
 | 
						|
You may obtain a copy of the License at
 | 
						|
 | 
						|
    http://www.apache.org/licenses/LICENSE-2.0
 | 
						|
 | 
						|
Unless required by applicable law or agreed to in writing, software
 | 
						|
distributed under the License is distributed on an "AS IS" BASIS,
 | 
						|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
						|
See the License for the specific language governing permissions and
 | 
						|
limitations under the License.
 | 
						|
*/
 | 
						|
 | 
						|
package kuberuntime
 | 
						|
 | 
						|
import (
 | 
						|
	"fmt"
 | 
						|
	"os"
 | 
						|
	"path/filepath"
 | 
						|
	"sort"
 | 
						|
	"time"
 | 
						|
 | 
						|
	"k8s.io/apimachinery/pkg/types"
 | 
						|
	utilerrors "k8s.io/apimachinery/pkg/util/errors"
 | 
						|
	"k8s.io/apimachinery/pkg/util/sets"
 | 
						|
	internalapi "k8s.io/cri-api/pkg/apis"
 | 
						|
	runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
 | 
						|
	"k8s.io/klog"
 | 
						|
	kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
 | 
						|
)
 | 
						|
 | 
						|
// containerGC is the manager of garbage collection.
 | 
						|
type containerGC struct {
 | 
						|
	client           internalapi.RuntimeService
 | 
						|
	manager          *kubeGenericRuntimeManager
 | 
						|
	podStateProvider podStateProvider
 | 
						|
}
 | 
						|
 | 
						|
// NewContainerGC creates a new containerGC.
 | 
						|
func newContainerGC(client internalapi.RuntimeService, podStateProvider podStateProvider, manager *kubeGenericRuntimeManager) *containerGC {
 | 
						|
	return &containerGC{
 | 
						|
		client:           client,
 | 
						|
		manager:          manager,
 | 
						|
		podStateProvider: podStateProvider,
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// containerGCInfo is the internal information kept for containers being considered for GC.
 | 
						|
type containerGCInfo struct {
 | 
						|
	// The ID of the container.
 | 
						|
	id string
 | 
						|
	// The name of the container.
 | 
						|
	name string
 | 
						|
	// Creation time for the container.
 | 
						|
	createTime time.Time
 | 
						|
	// If true, the container is in unknown state. Garbage collector should try
 | 
						|
	// to stop containers before removal.
 | 
						|
	unknown bool
 | 
						|
}
 | 
						|
 | 
						|
// sandboxGCInfo is the internal information kept for sandboxes being considered for GC.
 | 
						|
type sandboxGCInfo struct {
 | 
						|
	// The ID of the sandbox.
 | 
						|
	id string
 | 
						|
	// Creation time for the sandbox.
 | 
						|
	createTime time.Time
 | 
						|
	// If true, the sandbox is ready or still has containers.
 | 
						|
	active bool
 | 
						|
}
 | 
						|
 | 
						|
// evictUnit is considered for eviction as units of (UID, container name) pair.
 | 
						|
type evictUnit struct {
 | 
						|
	// UID of the pod.
 | 
						|
	uid types.UID
 | 
						|
	// Name of the container in the pod.
 | 
						|
	name string
 | 
						|
}
 | 
						|
 | 
						|
type containersByEvictUnit map[evictUnit][]containerGCInfo
 | 
						|
type sandboxesByPodUID map[types.UID][]sandboxGCInfo
 | 
						|
 | 
						|
// NumContainers returns the number of containers in this map.
 | 
						|
func (cu containersByEvictUnit) NumContainers() int {
 | 
						|
	num := 0
 | 
						|
	for key := range cu {
 | 
						|
		num += len(cu[key])
 | 
						|
	}
 | 
						|
	return num
 | 
						|
}
 | 
						|
 | 
						|
// NumEvictUnits returns the number of pod in this map.
 | 
						|
func (cu containersByEvictUnit) NumEvictUnits() int {
 | 
						|
	return len(cu)
 | 
						|
}
 | 
						|
 | 
						|
// Newest first.
 | 
						|
type byCreated []containerGCInfo
 | 
						|
 | 
						|
func (a byCreated) Len() int           { return len(a) }
 | 
						|
func (a byCreated) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
 | 
						|
func (a byCreated) Less(i, j int) bool { return a[i].createTime.After(a[j].createTime) }
 | 
						|
 | 
						|
// Newest first.
 | 
						|
type sandboxByCreated []sandboxGCInfo
 | 
						|
 | 
						|
func (a sandboxByCreated) Len() int           { return len(a) }
 | 
						|
func (a sandboxByCreated) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
 | 
						|
func (a sandboxByCreated) Less(i, j int) bool { return a[i].createTime.After(a[j].createTime) }
 | 
						|
 | 
						|
// enforceMaxContainersPerEvictUnit enforces MaxPerPodContainer for each evictUnit.
 | 
						|
func (cgc *containerGC) enforceMaxContainersPerEvictUnit(evictUnits containersByEvictUnit, MaxContainers int) {
 | 
						|
	for key := range evictUnits {
 | 
						|
		toRemove := len(evictUnits[key]) - MaxContainers
 | 
						|
 | 
						|
		if toRemove > 0 {
 | 
						|
			evictUnits[key] = cgc.removeOldestN(evictUnits[key], toRemove)
 | 
						|
		}
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// removeOldestN removes the oldest toRemove containers and returns the resulting slice.
 | 
						|
func (cgc *containerGC) removeOldestN(containers []containerGCInfo, toRemove int) []containerGCInfo {
 | 
						|
	// Remove from oldest to newest (last to first).
 | 
						|
	numToKeep := len(containers) - toRemove
 | 
						|
	for i := len(containers) - 1; i >= numToKeep; i-- {
 | 
						|
		if containers[i].unknown {
 | 
						|
			// Containers in known state could be running, we should try
 | 
						|
			// to stop it before removal.
 | 
						|
			id := kubecontainer.ContainerID{
 | 
						|
				Type: cgc.manager.runtimeName,
 | 
						|
				ID:   containers[i].id,
 | 
						|
			}
 | 
						|
			message := "Container is in unknown state, try killing it before removal"
 | 
						|
			if err := cgc.manager.killContainer(nil, id, containers[i].name, message, nil); err != nil {
 | 
						|
				klog.Errorf("Failed to stop container %q: %v", containers[i].id, err)
 | 
						|
				continue
 | 
						|
			}
 | 
						|
		}
 | 
						|
		if err := cgc.manager.removeContainer(containers[i].id); err != nil {
 | 
						|
			klog.Errorf("Failed to remove container %q: %v", containers[i].id, err)
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	// Assume we removed the containers so that we're not too aggressive.
 | 
						|
	return containers[:numToKeep]
 | 
						|
}
 | 
						|
 | 
						|
// removeOldestNSandboxes removes the oldest inactive toRemove sandboxes and
 | 
						|
// returns the resulting slice.
 | 
						|
func (cgc *containerGC) removeOldestNSandboxes(sandboxes []sandboxGCInfo, toRemove int) {
 | 
						|
	// Remove from oldest to newest (last to first).
 | 
						|
	numToKeep := len(sandboxes) - toRemove
 | 
						|
	for i := len(sandboxes) - 1; i >= numToKeep; i-- {
 | 
						|
		if !sandboxes[i].active {
 | 
						|
			cgc.removeSandbox(sandboxes[i].id)
 | 
						|
		}
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// removeSandbox removes the sandbox by sandboxID.
 | 
						|
func (cgc *containerGC) removeSandbox(sandboxID string) {
 | 
						|
	klog.V(4).Infof("Removing sandbox %q", sandboxID)
 | 
						|
	// In normal cases, kubelet should've already called StopPodSandbox before
 | 
						|
	// GC kicks in. To guard against the rare cases where this is not true, try
 | 
						|
	// stopping the sandbox before removing it.
 | 
						|
	if err := cgc.client.StopPodSandbox(sandboxID); err != nil {
 | 
						|
		klog.Errorf("Failed to stop sandbox %q before removing: %v", sandboxID, err)
 | 
						|
		return
 | 
						|
	}
 | 
						|
	if err := cgc.client.RemovePodSandbox(sandboxID); err != nil {
 | 
						|
		klog.Errorf("Failed to remove sandbox %q: %v", sandboxID, err)
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// evictableContainers gets all containers that are evictable. Evictable containers are: not running
 | 
						|
// and created more than MinAge ago.
 | 
						|
func (cgc *containerGC) evictableContainers(minAge time.Duration) (containersByEvictUnit, error) {
 | 
						|
	containers, err := cgc.manager.getKubeletContainers(true)
 | 
						|
	if err != nil {
 | 
						|
		return containersByEvictUnit{}, err
 | 
						|
	}
 | 
						|
 | 
						|
	evictUnits := make(containersByEvictUnit)
 | 
						|
	newestGCTime := time.Now().Add(-minAge)
 | 
						|
	for _, container := range containers {
 | 
						|
		// Prune out running containers.
 | 
						|
		if container.State == runtimeapi.ContainerState_CONTAINER_RUNNING {
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		createdAt := time.Unix(0, container.CreatedAt)
 | 
						|
		if newestGCTime.Before(createdAt) {
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		labeledInfo := getContainerInfoFromLabels(container.Labels)
 | 
						|
		containerInfo := containerGCInfo{
 | 
						|
			id:         container.Id,
 | 
						|
			name:       container.Metadata.Name,
 | 
						|
			createTime: createdAt,
 | 
						|
			unknown:    container.State == runtimeapi.ContainerState_CONTAINER_UNKNOWN,
 | 
						|
		}
 | 
						|
		key := evictUnit{
 | 
						|
			uid:  labeledInfo.PodUID,
 | 
						|
			name: containerInfo.name,
 | 
						|
		}
 | 
						|
		evictUnits[key] = append(evictUnits[key], containerInfo)
 | 
						|
	}
 | 
						|
 | 
						|
	// Sort the containers by age.
 | 
						|
	for uid := range evictUnits {
 | 
						|
		sort.Sort(byCreated(evictUnits[uid]))
 | 
						|
	}
 | 
						|
 | 
						|
	return evictUnits, nil
 | 
						|
}
 | 
						|
 | 
						|
// evict all containers that are evictable
 | 
						|
func (cgc *containerGC) evictContainers(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool, evictTerminatedPods bool) error {
 | 
						|
	// Separate containers by evict units.
 | 
						|
	evictUnits, err := cgc.evictableContainers(gcPolicy.MinAge)
 | 
						|
	if err != nil {
 | 
						|
		return err
 | 
						|
	}
 | 
						|
 | 
						|
	// Remove deleted pod containers if all sources are ready.
 | 
						|
	if allSourcesReady {
 | 
						|
		for key, unit := range evictUnits {
 | 
						|
			if cgc.podStateProvider.IsPodDeleted(key.uid) || (cgc.podStateProvider.IsPodTerminated(key.uid) && evictTerminatedPods) {
 | 
						|
				cgc.removeOldestN(unit, len(unit)) // Remove all.
 | 
						|
				delete(evictUnits, key)
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	// Enforce max containers per evict unit.
 | 
						|
	if gcPolicy.MaxPerPodContainer >= 0 {
 | 
						|
		cgc.enforceMaxContainersPerEvictUnit(evictUnits, gcPolicy.MaxPerPodContainer)
 | 
						|
	}
 | 
						|
 | 
						|
	// Enforce max total number of containers.
 | 
						|
	if gcPolicy.MaxContainers >= 0 && evictUnits.NumContainers() > gcPolicy.MaxContainers {
 | 
						|
		// Leave an equal number of containers per evict unit (min: 1).
 | 
						|
		numContainersPerEvictUnit := gcPolicy.MaxContainers / evictUnits.NumEvictUnits()
 | 
						|
		if numContainersPerEvictUnit < 1 {
 | 
						|
			numContainersPerEvictUnit = 1
 | 
						|
		}
 | 
						|
		cgc.enforceMaxContainersPerEvictUnit(evictUnits, numContainersPerEvictUnit)
 | 
						|
 | 
						|
		// If we still need to evict, evict oldest first.
 | 
						|
		numContainers := evictUnits.NumContainers()
 | 
						|
		if numContainers > gcPolicy.MaxContainers {
 | 
						|
			flattened := make([]containerGCInfo, 0, numContainers)
 | 
						|
			for key := range evictUnits {
 | 
						|
				flattened = append(flattened, evictUnits[key]...)
 | 
						|
			}
 | 
						|
			sort.Sort(byCreated(flattened))
 | 
						|
 | 
						|
			cgc.removeOldestN(flattened, numContainers-gcPolicy.MaxContainers)
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return nil
 | 
						|
}
 | 
						|
 | 
						|
// evictSandboxes remove all evictable sandboxes. An evictable sandbox must
 | 
						|
// meet the following requirements:
 | 
						|
//   1. not in ready state
 | 
						|
//   2. contains no containers.
 | 
						|
//   3. belong to a non-existent (i.e., already removed) pod, or is not the
 | 
						|
//      most recently created sandbox for the pod.
 | 
						|
func (cgc *containerGC) evictSandboxes(evictTerminatedPods bool) error {
 | 
						|
	containers, err := cgc.manager.getKubeletContainers(true)
 | 
						|
	if err != nil {
 | 
						|
		return err
 | 
						|
	}
 | 
						|
 | 
						|
	sandboxes, err := cgc.manager.getKubeletSandboxes(true)
 | 
						|
	if err != nil {
 | 
						|
		return err
 | 
						|
	}
 | 
						|
 | 
						|
	// collect all the PodSandboxId of container
 | 
						|
	sandboxIDs := sets.NewString()
 | 
						|
	for _, container := range containers {
 | 
						|
		sandboxIDs.Insert(container.PodSandboxId)
 | 
						|
	}
 | 
						|
 | 
						|
	sandboxesByPod := make(sandboxesByPodUID)
 | 
						|
	for _, sandbox := range sandboxes {
 | 
						|
		podUID := types.UID(sandbox.Metadata.Uid)
 | 
						|
		sandboxInfo := sandboxGCInfo{
 | 
						|
			id:         sandbox.Id,
 | 
						|
			createTime: time.Unix(0, sandbox.CreatedAt),
 | 
						|
		}
 | 
						|
 | 
						|
		// Set ready sandboxes to be active.
 | 
						|
		if sandbox.State == runtimeapi.PodSandboxState_SANDBOX_READY {
 | 
						|
			sandboxInfo.active = true
 | 
						|
		}
 | 
						|
 | 
						|
		// Set sandboxes that still have containers to be active.
 | 
						|
		if sandboxIDs.Has(sandbox.Id) {
 | 
						|
			sandboxInfo.active = true
 | 
						|
		}
 | 
						|
 | 
						|
		sandboxesByPod[podUID] = append(sandboxesByPod[podUID], sandboxInfo)
 | 
						|
	}
 | 
						|
 | 
						|
	// Sort the sandboxes by age.
 | 
						|
	for uid := range sandboxesByPod {
 | 
						|
		sort.Sort(sandboxByCreated(sandboxesByPod[uid]))
 | 
						|
	}
 | 
						|
 | 
						|
	for podUID, sandboxes := range sandboxesByPod {
 | 
						|
		if cgc.podStateProvider.IsPodDeleted(podUID) || (cgc.podStateProvider.IsPodTerminated(podUID) && evictTerminatedPods) {
 | 
						|
			// Remove all evictable sandboxes if the pod has been removed.
 | 
						|
			// Note that the latest dead sandbox is also removed if there is
 | 
						|
			// already an active one.
 | 
						|
			cgc.removeOldestNSandboxes(sandboxes, len(sandboxes))
 | 
						|
		} else {
 | 
						|
			// Keep latest one if the pod still exists.
 | 
						|
			cgc.removeOldestNSandboxes(sandboxes, len(sandboxes)-1)
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return nil
 | 
						|
}
 | 
						|
 | 
						|
// evictPodLogsDirectories evicts all evictable pod logs directories. Pod logs directories
 | 
						|
// are evictable if there are no corresponding pods.
 | 
						|
func (cgc *containerGC) evictPodLogsDirectories(allSourcesReady bool) error {
 | 
						|
	osInterface := cgc.manager.osInterface
 | 
						|
	if allSourcesReady {
 | 
						|
		// Only remove pod logs directories when all sources are ready.
 | 
						|
		dirs, err := osInterface.ReadDir(podLogsRootDirectory)
 | 
						|
		if err != nil {
 | 
						|
			return fmt.Errorf("failed to read podLogsRootDirectory %q: %v", podLogsRootDirectory, err)
 | 
						|
		}
 | 
						|
		for _, dir := range dirs {
 | 
						|
			name := dir.Name()
 | 
						|
			podUID := parsePodUIDFromLogsDirectory(name)
 | 
						|
			if !cgc.podStateProvider.IsPodDeleted(podUID) {
 | 
						|
				continue
 | 
						|
			}
 | 
						|
			err := osInterface.RemoveAll(filepath.Join(podLogsRootDirectory, name))
 | 
						|
			if err != nil {
 | 
						|
				klog.Errorf("Failed to remove pod logs directory %q: %v", name, err)
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	// Remove dead container log symlinks.
 | 
						|
	// TODO(random-liu): Remove this after cluster logging supports CRI container log path.
 | 
						|
	logSymlinks, _ := osInterface.Glob(filepath.Join(legacyContainerLogsDir, fmt.Sprintf("*.%s", legacyLogSuffix)))
 | 
						|
	for _, logSymlink := range logSymlinks {
 | 
						|
		if _, err := osInterface.Stat(logSymlink); os.IsNotExist(err) {
 | 
						|
			err := osInterface.Remove(logSymlink)
 | 
						|
			if err != nil {
 | 
						|
				klog.Errorf("Failed to remove container log dead symlink %q: %v", logSymlink, err)
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return nil
 | 
						|
}
 | 
						|
 | 
						|
// GarbageCollect removes dead containers using the specified container gc policy.
 | 
						|
// Note that gc policy is not applied to sandboxes. Sandboxes are only removed when they are
 | 
						|
// not ready and containing no containers.
 | 
						|
//
 | 
						|
// GarbageCollect consists of the following steps:
 | 
						|
// * gets evictable containers which are not active and created more than gcPolicy.MinAge ago.
 | 
						|
// * removes oldest dead containers for each pod by enforcing gcPolicy.MaxPerPodContainer.
 | 
						|
// * removes oldest dead containers by enforcing gcPolicy.MaxContainers.
 | 
						|
// * gets evictable sandboxes which are not ready and contains no containers.
 | 
						|
// * removes evictable sandboxes.
 | 
						|
func (cgc *containerGC) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool, evictTerminatedPods bool) error {
 | 
						|
	errors := []error{}
 | 
						|
	// Remove evictable containers
 | 
						|
	if err := cgc.evictContainers(gcPolicy, allSourcesReady, evictTerminatedPods); err != nil {
 | 
						|
		errors = append(errors, err)
 | 
						|
	}
 | 
						|
 | 
						|
	// Remove sandboxes with zero containers
 | 
						|
	if err := cgc.evictSandboxes(evictTerminatedPods); err != nil {
 | 
						|
		errors = append(errors, err)
 | 
						|
	}
 | 
						|
 | 
						|
	// Remove pod sandbox log directory
 | 
						|
	if err := cgc.evictPodLogsDirectories(allSourcesReady); err != nil {
 | 
						|
		errors = append(errors, err)
 | 
						|
	}
 | 
						|
	return utilerrors.NewAggregate(errors)
 | 
						|
}
 |