Merge pull request #45896 from dashpole/disk_pressure_reclaim

Automatic merge from submit-queue

Delete all dead containers and sandboxes when under disk pressure.

This PR modifies the eviction manager to add dead container and sandbox garbage collection as a resource reclaim function for disk.  It also modifies the container GC logic to allow pods that are terminated, but not deleted to be removed.

It still does not delete containers that are less than the minGcAge.  This should prevent nodes from entering a permanently bad state if the entire disk is occupied by pods that are terminated (in the state failed, or succeeded), but not deleted.

There are two improvements we should consider making in the future:

- Track the disk space and inodes reclaimed by deleting containers.  We currently do not track this, and it prevents us from determining if deleting containers resolves disk pressure.  So we may still evict a pod even if we are able to free disk space by deleting dead containers.
- Once we can track disk space and inodes reclaimed, we should consider only deleting the containers we need to in order to relieve disk pressure.  This should help avoid a scenario where we try and delete a massive number of containers all at once, and overwhelm the runtime.

/assign @vishh 
cc @derekwaynecarr 

```release-note
Disk Pressure triggers the deletion of terminated containers on the node.
```
This commit is contained in:
Kubernetes Submit Queue 2017-06-03 23:43:46 -07:00 committed by GitHub
commit 3fdf6c3d14
16 changed files with 193 additions and 105 deletions

View File

@ -39,7 +39,15 @@ type ContainerGCPolicy struct {
// Implementation is thread-compatible.
type ContainerGC interface {
// Garbage collect containers.
GarbageCollect(allSourcesReady bool) error
GarbageCollect() error
// Deletes all unused containers, including containers belonging to pods that are terminated but not deleted
DeleteAllUnusedContainers() error
}
// SourcesReadyProvider knows how to determine if configuration sources are ready
type SourcesReadyProvider interface {
// AllReady returns true if the currently configured sources have all been seen.
AllReady() bool
}
// TODO(vmarmol): Preferentially remove pod infra containers.
@ -49,20 +57,28 @@ type realContainerGC struct {
// Policy for garbage collection.
policy ContainerGCPolicy
// sourcesReadyProvider provides the readyness of kubelet configuration sources.
sourcesReadyProvider SourcesReadyProvider
}
// New ContainerGC instance with the specified policy.
func NewContainerGC(runtime Runtime, policy ContainerGCPolicy) (ContainerGC, error) {
func NewContainerGC(runtime Runtime, policy ContainerGCPolicy, sourcesReadyProvider SourcesReadyProvider) (ContainerGC, error) {
if policy.MinAge < 0 {
return nil, fmt.Errorf("invalid minimum garbage collection age: %v", policy.MinAge)
}
return &realContainerGC{
runtime: runtime,
policy: policy,
runtime: runtime,
policy: policy,
sourcesReadyProvider: sourcesReadyProvider,
}, nil
}
func (cgc *realContainerGC) GarbageCollect(allSourcesReady bool) error {
return cgc.runtime.GarbageCollect(cgc.policy, allSourcesReady)
func (cgc *realContainerGC) GarbageCollect() error {
return cgc.runtime.GarbageCollect(cgc.policy, cgc.sourcesReadyProvider.AllReady(), false)
}
func (cgc *realContainerGC) DeleteAllUnusedContainers() error {
return cgc.runtime.GarbageCollect(cgc.policy, cgc.sourcesReadyProvider.AllReady(), true)
}

View File

@ -82,8 +82,10 @@ type Runtime interface {
// complete list of pods from all avialble sources (e.g., apiserver, http,
// file). In this case, garbage collector should refrain itself from aggressive
// behavior such as removing all containers of unrecognized pods (yet).
// If evictNonDeletedPods is set to true, containers and sandboxes belonging to pods
// that are terminated, but not deleted will be evicted. Otherwise, only deleted pods will be GC'd.
// TODO: Revisit this method and make it cleaner.
GarbageCollect(gcPolicy ContainerGCPolicy, allSourcesReady bool) error
GarbageCollect(gcPolicy ContainerGCPolicy, allSourcesReady bool, evictNonDeletedPods bool) error
// Syncs the running pod into the desired pod.
SyncPod(pod *v1.Pod, apiPodStatus v1.PodStatus, podStatus *PodStatus, pullSecrets []v1.Secret, backOff *flowcontrol.Backoff) PodSyncResult
// KillPod kills all the containers of a pod. Pod may be nil, running pod must not be.

View File

@ -431,7 +431,7 @@ func (f *FakeRuntime) GetPodContainerID(pod *Pod) (ContainerID, error) {
return ContainerID{}, f.Err
}
func (f *FakeRuntime) GarbageCollect(gcPolicy ContainerGCPolicy, ready bool) error {
func (f *FakeRuntime) GarbageCollect(gcPolicy ContainerGCPolicy, ready bool, evictNonDeletedPods bool) error {
f.Lock()
defer f.Unlock()

View File

@ -140,8 +140,8 @@ func (r *Mock) GetPodContainerID(pod *Pod) (ContainerID, error) {
return ContainerID{}, args.Error(0)
}
func (r *Mock) GarbageCollect(gcPolicy ContainerGCPolicy, ready bool) error {
args := r.Called(gcPolicy, ready)
func (r *Mock) GarbageCollect(gcPolicy ContainerGCPolicy, ready bool, evictNonDeletedPods bool) error {
args := r.Called(gcPolicy, ready, evictNonDeletedPods)
return args.Error(0)
}

View File

@ -58,6 +58,8 @@ type managerImpl struct {
killPodFunc KillPodFunc
// the interface that knows how to do image gc
imageGC ImageGC
// the interface that knows how to do image gc
containerGC ContainerGC
// protects access to internal state
sync.RWMutex
// node conditions are the set of conditions present
@ -95,6 +97,7 @@ func NewManager(
config Config,
killPodFunc KillPodFunc,
imageGC ImageGC,
containerGC ContainerGC,
recorder record.EventRecorder,
nodeRef *clientv1.ObjectReference,
clock clock.Clock) (Manager, lifecycle.PodAdmitHandler) {
@ -102,6 +105,7 @@ func NewManager(
clock: clock,
killPodFunc: killPodFunc,
imageGC: imageGC,
containerGC: containerGC,
config: config,
recorder: recorder,
summaryProvider: summaryProvider,
@ -223,8 +227,7 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
}
m.dedicatedImageFs = &hasImageFs
m.resourceToRankFunc = buildResourceToRankFunc(hasImageFs)
m.resourceToNodeReclaimFuncs = buildResourceToNodeReclaimFuncs(m.imageGC, hasImageFs)
m.resourceToNodeReclaimFuncs = buildResourceToNodeReclaimFuncs(m.imageGC, m.containerGC, hasImageFs)
}
activePods := podFunc()

View File

@ -77,17 +77,24 @@ func (m *mockNodeProvider) GetNode() (*v1.Node, error) {
return &m.node, nil
}
// mockImageGC is used to simulate invoking image garbage collection.
type mockImageGC struct {
err error
freed int64
invoked bool
// mockDiskGC is used to simulate invoking image and container garbage collection.
type mockDiskGC struct {
err error
imageBytesFreed int64
imageGCInvoked bool
containerGCInvoked bool
}
// DeleteUnusedImages returns the mocked values.
func (m *mockImageGC) DeleteUnusedImages() (int64, error) {
m.invoked = true
return m.freed, m.err
func (m *mockDiskGC) DeleteUnusedImages() (int64, error) {
m.imageGCInvoked = true
return m.imageBytesFreed, m.err
}
// DeleteAllUnusedContainers returns the mocked value
func (m *mockDiskGC) DeleteAllUnusedContainers() error {
m.containerGCInvoked = true
return m.err
}
func makePodWithMemoryStats(name string, requests v1.ResourceList, limits v1.ResourceList, memoryWorkingSet string) (*v1.Pod, statsapi.PodStats) {
@ -194,7 +201,7 @@ func TestMemoryPressure(t *testing.T) {
podKiller := &mockPodKiller{}
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
nodeProvider := newMockNodeProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("2Gi")})
imageGC := &mockImageGC{freed: int64(0), err: nil}
imageGC := &mockDiskGC{imageBytesFreed: int64(0), err: nil}
nodeRef := &clientv1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
config := Config{
@ -412,7 +419,7 @@ func TestDiskPressureNodeFs(t *testing.T) {
podKiller := &mockPodKiller{}
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
nodeProvider := newMockNodeProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("2Gi")})
imageGC := &mockImageGC{freed: int64(0), err: nil}
diskGC := &mockDiskGC{imageBytesFreed: int64(0), err: nil}
nodeRef := &clientv1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
config := Config{
@ -440,7 +447,8 @@ func TestDiskPressureNodeFs(t *testing.T) {
manager := &managerImpl{
clock: fakeClock,
killPodFunc: podKiller.killPodNow,
imageGC: imageGC,
imageGC: diskGC,
containerGC: diskGC,
config: config,
recorder: &record.FakeRecorder{},
summaryProvider: summaryProvider,
@ -610,7 +618,7 @@ func TestMinReclaim(t *testing.T) {
podKiller := &mockPodKiller{}
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
nodeProvider := newMockNodeProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("2Gi")})
imageGC := &mockImageGC{freed: int64(0), err: nil}
diskGC := &mockDiskGC{imageBytesFreed: int64(0), err: nil}
nodeRef := &clientv1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
config := Config{
@ -633,7 +641,8 @@ func TestMinReclaim(t *testing.T) {
manager := &managerImpl{
clock: fakeClock,
killPodFunc: podKiller.killPodNow,
imageGC: imageGC,
imageGC: diskGC,
containerGC: diskGC,
config: config,
recorder: &record.FakeRecorder{},
summaryProvider: summaryProvider,
@ -750,7 +759,7 @@ func TestNodeReclaimFuncs(t *testing.T) {
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
nodeProvider := newMockNodeProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("2Gi")})
imageGcFree := resource.MustParse("700Mi")
imageGC := &mockImageGC{freed: imageGcFree.Value(), err: nil}
diskGC := &mockDiskGC{imageBytesFreed: imageGcFree.Value(), err: nil}
nodeRef := &clientv1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
config := Config{
@ -773,7 +782,8 @@ func TestNodeReclaimFuncs(t *testing.T) {
manager := &managerImpl{
clock: fakeClock,
killPodFunc: podKiller.killPodNow,
imageGC: imageGC,
imageGC: diskGC,
containerGC: diskGC,
config: config,
recorder: &record.FakeRecorder{},
summaryProvider: summaryProvider,
@ -801,7 +811,7 @@ func TestNodeReclaimFuncs(t *testing.T) {
}
// verify image gc was invoked
if !imageGC.invoked {
if !diskGC.imageGCInvoked || !diskGC.containerGCInvoked {
t.Errorf("Manager should have invoked image gc")
}
@ -811,7 +821,8 @@ func TestNodeReclaimFuncs(t *testing.T) {
}
// reset state
imageGC.invoked = false
diskGC.imageGCInvoked = false
diskGC.containerGCInvoked = false
// remove disk pressure
fakeClock.Step(20 * time.Minute)
@ -833,8 +844,8 @@ func TestNodeReclaimFuncs(t *testing.T) {
t.Errorf("Manager should report disk pressure")
}
// ensure image gc was invoked
if !imageGC.invoked {
// ensure disk gc was invoked
if !diskGC.imageGCInvoked || !diskGC.containerGCInvoked {
t.Errorf("Manager should have invoked image gc")
}
@ -850,8 +861,9 @@ func TestNodeReclaimFuncs(t *testing.T) {
// reduce disk pressure
fakeClock.Step(1 * time.Minute)
summaryProvider.result = summaryStatsMaker("16Gi", "200Gi", podStats)
imageGC.invoked = false // reset state
podKiller.pod = nil // reset state
diskGC.imageGCInvoked = false // reset state
diskGC.containerGCInvoked = false // reset state
podKiller.pod = nil // reset state
manager.synchronize(diskInfoProvider, activePodsFunc, nodeProvider)
// we should have disk pressure (because transition period not yet met)
@ -860,7 +872,7 @@ func TestNodeReclaimFuncs(t *testing.T) {
}
// no image gc should have occurred
if imageGC.invoked {
if diskGC.imageGCInvoked || diskGC.containerGCInvoked {
t.Errorf("Manager chose to perform image gc when it was not neeed")
}
@ -872,8 +884,9 @@ func TestNodeReclaimFuncs(t *testing.T) {
// move the clock past transition period to ensure that we stop reporting pressure
fakeClock.Step(5 * time.Minute)
summaryProvider.result = summaryStatsMaker("16Gi", "200Gi", podStats)
imageGC.invoked = false // reset state
podKiller.pod = nil // reset state
diskGC.imageGCInvoked = false // reset state
diskGC.containerGCInvoked = false // reset state
podKiller.pod = nil // reset state
manager.synchronize(diskInfoProvider, activePodsFunc, nodeProvider)
// we should not have disk pressure (because transition period met)
@ -882,7 +895,7 @@ func TestNodeReclaimFuncs(t *testing.T) {
}
// no image gc should have occurred
if imageGC.invoked {
if diskGC.imageGCInvoked || diskGC.containerGCInvoked {
t.Errorf("Manager chose to perform image gc when it was not neeed")
}
@ -943,7 +956,7 @@ func TestInodePressureNodeFsInodes(t *testing.T) {
podKiller := &mockPodKiller{}
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
nodeProvider := newMockNodeProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("2Gi")})
imageGC := &mockImageGC{freed: int64(0), err: nil}
diskGC := &mockDiskGC{imageBytesFreed: int64(0), err: nil}
nodeRef := &clientv1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
config := Config{
@ -971,7 +984,8 @@ func TestInodePressureNodeFsInodes(t *testing.T) {
manager := &managerImpl{
clock: fakeClock,
killPodFunc: podKiller.killPodNow,
imageGC: imageGC,
imageGC: diskGC,
containerGC: diskGC,
config: config,
recorder: &record.FakeRecorder{},
summaryProvider: summaryProvider,
@ -1144,7 +1158,7 @@ func TestCriticalPodsAreNotEvicted(t *testing.T) {
podKiller := &mockPodKiller{}
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
nodeProvider := newMockNodeProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("2Gi")})
imageGC := &mockImageGC{freed: int64(0), err: nil}
diskGC := &mockDiskGC{imageBytesFreed: int64(0), err: nil}
nodeRef := &clientv1.ObjectReference{
Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: "",
}
@ -1174,7 +1188,8 @@ func TestCriticalPodsAreNotEvicted(t *testing.T) {
manager := &managerImpl{
clock: fakeClock,
killPodFunc: podKiller.killPodNow,
imageGC: imageGC,
imageGC: diskGC,
containerGC: diskGC,
config: config,
recorder: &record.FakeRecorder{},
summaryProvider: summaryProvider,
@ -1276,7 +1291,7 @@ func TestAllocatableMemoryPressure(t *testing.T) {
podKiller := &mockPodKiller{}
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
nodeProvider := newMockNodeProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("2Gi")})
imageGC := &mockImageGC{freed: int64(0), err: nil}
diskGC := &mockDiskGC{imageBytesFreed: int64(0), err: nil}
nodeRef := &clientv1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
config := Config{
@ -1296,7 +1311,8 @@ func TestAllocatableMemoryPressure(t *testing.T) {
manager := &managerImpl{
clock: fakeClock,
killPodFunc: podKiller.killPodNow,
imageGC: imageGC,
imageGC: diskGC,
containerGC: diskGC,
config: config,
recorder: &record.FakeRecorder{},
summaryProvider: summaryProvider,

View File

@ -1019,32 +1019,34 @@ func PodIsEvicted(podStatus v1.PodStatus) bool {
}
// buildResourceToNodeReclaimFuncs returns reclaim functions associated with resources.
func buildResourceToNodeReclaimFuncs(imageGC ImageGC, withImageFs bool) map[v1.ResourceName]nodeReclaimFuncs {
func buildResourceToNodeReclaimFuncs(imageGC ImageGC, containerGC ContainerGC, withImageFs bool) map[v1.ResourceName]nodeReclaimFuncs {
resourceToReclaimFunc := map[v1.ResourceName]nodeReclaimFuncs{}
// usage of an imagefs is optional
if withImageFs {
// with an imagefs, nodefs pressure should just delete logs
resourceToReclaimFunc[resourceNodeFs] = nodeReclaimFuncs{deleteLogs()}
resourceToReclaimFunc[resourceNodeFsInodes] = nodeReclaimFuncs{deleteLogs()}
resourceToReclaimFunc[resourceNodeFs] = nodeReclaimFuncs{}
resourceToReclaimFunc[resourceNodeFsInodes] = nodeReclaimFuncs{}
// with an imagefs, imagefs pressure should delete unused images
resourceToReclaimFunc[resourceImageFs] = nodeReclaimFuncs{deleteImages(imageGC, true)}
resourceToReclaimFunc[resourceImageFsInodes] = nodeReclaimFuncs{deleteImages(imageGC, false)}
resourceToReclaimFunc[resourceImageFs] = nodeReclaimFuncs{deleteTerminatedContainers(containerGC), deleteImages(imageGC, true)}
resourceToReclaimFunc[resourceImageFsInodes] = nodeReclaimFuncs{deleteTerminatedContainers(containerGC), deleteImages(imageGC, false)}
} else {
// without an imagefs, nodefs pressure should delete logs, and unused images
// since imagefs and nodefs share a common device, they share common reclaim functions
resourceToReclaimFunc[resourceNodeFs] = nodeReclaimFuncs{deleteLogs(), deleteImages(imageGC, true)}
resourceToReclaimFunc[resourceNodeFsInodes] = nodeReclaimFuncs{deleteLogs(), deleteImages(imageGC, false)}
resourceToReclaimFunc[resourceImageFs] = nodeReclaimFuncs{deleteLogs(), deleteImages(imageGC, true)}
resourceToReclaimFunc[resourceImageFsInodes] = nodeReclaimFuncs{deleteLogs(), deleteImages(imageGC, false)}
resourceToReclaimFunc[resourceNodeFs] = nodeReclaimFuncs{deleteTerminatedContainers(containerGC), deleteImages(imageGC, true)}
resourceToReclaimFunc[resourceNodeFsInodes] = nodeReclaimFuncs{deleteTerminatedContainers(containerGC), deleteImages(imageGC, false)}
resourceToReclaimFunc[resourceImageFs] = nodeReclaimFuncs{deleteTerminatedContainers(containerGC), deleteImages(imageGC, true)}
resourceToReclaimFunc[resourceImageFsInodes] = nodeReclaimFuncs{deleteTerminatedContainers(containerGC), deleteImages(imageGC, false)}
}
return resourceToReclaimFunc
}
// deleteLogs will delete logs to free up disk pressure.
func deleteLogs() nodeReclaimFunc {
// deleteTerminatedContainers will delete terminated containers to free up disk pressure.
func deleteTerminatedContainers(containerGC ContainerGC) nodeReclaimFunc {
return func() (*resource.Quantity, error) {
// TODO: not yet supported.
return resource.NewQuantity(int64(0), resource.BinarySI), nil
glog.Infof("eviction manager: attempting to delete unused containers")
err := containerGC.DeleteAllUnusedContainers()
// Calculating bytes freed is not yet supported.
return resource.NewQuantity(int64(0), resource.BinarySI), err
}
}

View File

@ -81,6 +81,13 @@ type ImageGC interface {
DeleteUnusedImages() (int64, error)
}
// ContainerGC is responsible for performing garbage collection of unused containers.
type ContainerGC interface {
// DeleteAllUnusedContainers deletes all unused containers, even those that belong to pods that are terminated, but not deleted.
// It returns an error if it is unsuccessful.
DeleteAllUnusedContainers() error
}
// KillPodFunc kills a pod.
// The pod status is updated, and then it is killed with the specified grace period.
// This function must block until either the pod is killed or an error is encountered.

View File

@ -671,7 +671,7 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub
klet.updatePodCIDR(kubeCfg.PodCIDR)
// setup containerGC
containerGC, err := kubecontainer.NewContainerGC(klet.containerRuntime, containerGCPolicy)
containerGC, err := kubecontainer.NewContainerGC(klet.containerRuntime, containerGCPolicy, klet.sourcesReady)
if err != nil {
return nil, err
}
@ -761,7 +761,7 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub
klet.setNodeStatusFuncs = klet.defaultNodeStatusFuncs()
// setup eviction manager
evictionManager, evictionAdmitHandler := eviction.NewManager(klet.resourceAnalyzer, evictionConfig, killPodNow(klet.podWorkers, kubeDeps.Recorder), klet.imageManager, kubeDeps.Recorder, nodeRef, klet.clock)
evictionManager, evictionAdmitHandler := eviction.NewManager(klet.resourceAnalyzer, evictionConfig, killPodNow(klet.podWorkers, kubeDeps.Recorder), klet.imageManager, klet.containerGC, kubeDeps.Recorder, nodeRef, klet.clock)
klet.evictionManager = evictionManager
klet.admitHandlers.AddPodAdmitHandler(evictionAdmitHandler)
@ -1184,7 +1184,7 @@ func (kl *Kubelet) setupDataDirs() error {
func (kl *Kubelet) StartGarbageCollection() {
loggedContainerGCFailure := false
go wait.Until(func() {
if err := kl.containerGC.GarbageCollect(kl.sourcesReady.AllReady()); err != nil {
if err := kl.containerGC.GarbageCollect(); err != nil {
glog.Errorf("Container garbage collection failed: %v", err)
kl.recorder.Eventf(kl.nodeRef, v1.EventTypeWarning, events.ContainerGCFailed, err.Error())
loggedContainerGCFailure = true

View File

@ -270,7 +270,7 @@ func newTestKubeletWithImageList(
Namespace: "",
}
// setup eviction manager
evictionManager, evictionAdmitHandler := eviction.NewManager(kubelet.resourceAnalyzer, eviction.Config{}, killPodNow(kubelet.podWorkers, fakeRecorder), kubelet.imageManager, fakeRecorder, nodeRef, kubelet.clock)
evictionManager, evictionAdmitHandler := eviction.NewManager(kubelet.resourceAnalyzer, eviction.Config{}, killPodNow(kubelet.podWorkers, fakeRecorder), kubelet.imageManager, kubelet.containerGC, fakeRecorder, nodeRef, kubelet.clock)
kubelet.evictionManager = evictionManager
kubelet.admitHandlers.AddPodAdmitHandler(evictionAdmitHandler)

View File

@ -209,7 +209,7 @@ func (cgc *containerGC) evictableContainers(minAge time.Duration) (containersByE
}
// evict all containers that are evictable
func (cgc *containerGC) evictContainers(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool) error {
func (cgc *containerGC) evictContainers(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool, evictNonDeletedPods bool) error {
// Separate containers by evict units.
evictUnits, err := cgc.evictableContainers(gcPolicy.MinAge)
if err != nil {
@ -219,7 +219,7 @@ func (cgc *containerGC) evictContainers(gcPolicy kubecontainer.ContainerGCPolicy
// Remove deleted pod containers if all sources are ready.
if allSourcesReady {
for key, unit := range evictUnits {
if cgc.isPodDeleted(key.uid) {
if cgc.isPodDeleted(key.uid) || evictNonDeletedPods {
cgc.removeOldestN(unit, len(unit)) // Remove all.
delete(evictUnits, key)
}
@ -261,7 +261,7 @@ func (cgc *containerGC) evictContainers(gcPolicy kubecontainer.ContainerGCPolicy
// 2. contains no containers.
// 3. belong to a non-existent (i.e., already removed) pod, or is not the
// most recently created sandbox for the pod.
func (cgc *containerGC) evictSandboxes() error {
func (cgc *containerGC) evictSandboxes(evictNonDeletedPods bool) error {
containers, err := cgc.manager.getKubeletContainers(true)
if err != nil {
return err
@ -307,7 +307,7 @@ func (cgc *containerGC) evictSandboxes() error {
}
for podUID, sandboxes := range sandboxesByPod {
if cgc.isPodDeleted(podUID) {
if cgc.isPodDeleted(podUID) || evictNonDeletedPods {
// Remove all evictable sandboxes if the pod has been removed.
// Note that the latest dead sandbox is also removed if there is
// already an active one.
@ -367,14 +367,14 @@ func (cgc *containerGC) evictPodLogsDirectories(allSourcesReady bool) error {
// * removes oldest dead containers by enforcing gcPolicy.MaxContainers.
// * gets evictable sandboxes which are not ready and contains no containers.
// * removes evictable sandboxes.
func (cgc *containerGC) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool) error {
func (cgc *containerGC) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool, evictNonDeletedPods bool) error {
// Remove evictable containers
if err := cgc.evictContainers(gcPolicy, allSourcesReady); err != nil {
if err := cgc.evictContainers(gcPolicy, allSourcesReady, evictNonDeletedPods); err != nil {
return err
}
// Remove sandboxes with zero containers
if err := cgc.evictSandboxes(); err != nil {
if err := cgc.evictSandboxes(evictNonDeletedPods); err != nil {
return err
}

View File

@ -62,27 +62,30 @@ func TestSandboxGC(t *testing.T) {
}
for c, test := range []struct {
description string // description of the test case
sandboxes []sandboxTemplate // templates of sandboxes
containers []containerTemplate // templates of containers
minAge time.Duration // sandboxMinGCAge
remain []int // template indexes of remaining sandboxes
description string // description of the test case
sandboxes []sandboxTemplate // templates of sandboxes
containers []containerTemplate // templates of containers
minAge time.Duration // sandboxMinGCAge
remain []int // template indexes of remaining sandboxes
evictNonDeletedPods bool
}{
{
description: "notready sandboxes without containers for deleted pods should be garbage collected.",
sandboxes: []sandboxTemplate{
makeGCSandbox(pods[2], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, false, 0),
},
containers: []containerTemplate{},
remain: []int{},
containers: []containerTemplate{},
remain: []int{},
evictNonDeletedPods: false,
},
{
description: "ready sandboxes without containers for deleted pods should not be garbage collected.",
sandboxes: []sandboxTemplate{
makeGCSandbox(pods[2], 0, runtimeapi.PodSandboxState_SANDBOX_READY, false, 0),
},
containers: []containerTemplate{},
remain: []int{0},
containers: []containerTemplate{},
remain: []int{0},
evictNonDeletedPods: false,
},
{
description: "sandboxes for existing pods should not be garbage collected.",
@ -90,8 +93,19 @@ func TestSandboxGC(t *testing.T) {
makeGCSandbox(pods[0], 0, runtimeapi.PodSandboxState_SANDBOX_READY, true, 0),
makeGCSandbox(pods[1], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, 0),
},
containers: []containerTemplate{},
remain: []int{0, 1},
containers: []containerTemplate{},
remain: []int{0, 1},
evictNonDeletedPods: false,
},
{
description: "non-running sandboxes for existing pods should be garbage collected if evictNonDeletedPods is set.",
sandboxes: []sandboxTemplate{
makeGCSandbox(pods[0], 0, runtimeapi.PodSandboxState_SANDBOX_READY, true, 0),
makeGCSandbox(pods[1], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, 0),
},
containers: []containerTemplate{},
remain: []int{0},
evictNonDeletedPods: true,
},
{
description: "sandbox with containers should not be garbage collected.",
@ -101,7 +115,8 @@ func TestSandboxGC(t *testing.T) {
containers: []containerTemplate{
{pod: pods[0], container: &pods[0].Spec.Containers[0], state: runtimeapi.ContainerState_CONTAINER_EXITED},
},
remain: []int{0},
remain: []int{0},
evictNonDeletedPods: false,
},
{
description: "multiple sandboxes should be handled properly.",
@ -120,7 +135,8 @@ func TestSandboxGC(t *testing.T) {
containers: []containerTemplate{
{pod: pods[1], container: &pods[1].Spec.Containers[0], sandboxAttempt: 1, state: runtimeapi.ContainerState_CONTAINER_EXITED},
},
remain: []int{0, 2},
remain: []int{0, 2},
evictNonDeletedPods: false,
},
} {
t.Logf("TestCase #%d: %+v", c, test)
@ -129,7 +145,7 @@ func TestSandboxGC(t *testing.T) {
fakeRuntime.SetFakeSandboxes(fakeSandboxes)
fakeRuntime.SetFakeContainers(fakeContainers)
err := m.containerGC.evictSandboxes()
err := m.containerGC.evictSandboxes(test.evictNonDeletedPods)
assert.NoError(t, err)
realRemain, err := fakeRuntime.ListPodSandbox(nil)
assert.NoError(t, err)
@ -165,18 +181,20 @@ func TestContainerGC(t *testing.T) {
defaultGCPolicy := kubecontainer.ContainerGCPolicy{MinAge: time.Hour, MaxPerPodContainer: 2, MaxContainers: 6}
for c, test := range []struct {
description string // description of the test case
containers []containerTemplate // templates of containers
policy *kubecontainer.ContainerGCPolicy // container gc policy
remain []int // template indexes of remaining containers
description string // description of the test case
containers []containerTemplate // templates of containers
policy *kubecontainer.ContainerGCPolicy // container gc policy
remain []int // template indexes of remaining containers
evictNonDeletedPods bool
}{
{
description: "all containers should be removed when max container limit is 0",
containers: []containerTemplate{
makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
},
policy: &kubecontainer.ContainerGCPolicy{MinAge: time.Minute, MaxPerPodContainer: 1, MaxContainers: 0},
remain: []int{},
policy: &kubecontainer.ContainerGCPolicy{MinAge: time.Minute, MaxPerPodContainer: 1, MaxContainers: 0},
remain: []int{},
evictNonDeletedPods: false,
},
{
description: "max containers should be complied when no max per pod container limit is set",
@ -187,8 +205,9 @@ func TestContainerGC(t *testing.T) {
makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
},
policy: &kubecontainer.ContainerGCPolicy{MinAge: time.Minute, MaxPerPodContainer: -1, MaxContainers: 4},
remain: []int{0, 1, 2, 3},
policy: &kubecontainer.ContainerGCPolicy{MinAge: time.Minute, MaxPerPodContainer: -1, MaxContainers: 4},
remain: []int{0, 1, 2, 3},
evictNonDeletedPods: false,
},
{
description: "no containers should be removed if both max container and per pod container limits are not set",
@ -197,8 +216,9 @@ func TestContainerGC(t *testing.T) {
makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
},
policy: &kubecontainer.ContainerGCPolicy{MinAge: time.Minute, MaxPerPodContainer: -1, MaxContainers: -1},
remain: []int{0, 1, 2},
policy: &kubecontainer.ContainerGCPolicy{MinAge: time.Minute, MaxPerPodContainer: -1, MaxContainers: -1},
remain: []int{0, 1, 2},
evictNonDeletedPods: false,
},
{
description: "recently started containers should not be removed",
@ -207,7 +227,8 @@ func TestContainerGC(t *testing.T) {
makeGCContainer("foo", "bar", 1, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 0, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED),
},
remain: []int{0, 1, 2},
remain: []int{0, 1, 2},
evictNonDeletedPods: false,
},
{
description: "oldest containers should be removed when per pod container limit exceeded",
@ -216,7 +237,8 @@ func TestContainerGC(t *testing.T) {
makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
},
remain: []int{0, 1},
remain: []int{0, 1},
evictNonDeletedPods: false,
},
{
description: "running containers should not be removed",
@ -225,7 +247,8 @@ func TestContainerGC(t *testing.T) {
makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_RUNNING),
},
remain: []int{0, 1, 2},
remain: []int{0, 1, 2},
evictNonDeletedPods: false,
},
{
description: "no containers should be removed when limits are not exceeded",
@ -233,7 +256,8 @@ func TestContainerGC(t *testing.T) {
makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
},
remain: []int{0, 1},
remain: []int{0, 1},
evictNonDeletedPods: false,
},
{
description: "max container count should apply per (UID, container) pair",
@ -248,7 +272,8 @@ func TestContainerGC(t *testing.T) {
makeGCContainer("foo2", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo2", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
},
remain: []int{0, 1, 3, 4, 6, 7},
remain: []int{0, 1, 3, 4, 6, 7},
evictNonDeletedPods: false,
},
{
description: "max limit should apply and try to keep from every pod",
@ -264,7 +289,8 @@ func TestContainerGC(t *testing.T) {
makeGCContainer("foo4", "bar4", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo4", "bar4", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
},
remain: []int{0, 2, 4, 6, 8},
remain: []int{0, 2, 4, 6, 8},
evictNonDeletedPods: false,
},
{
description: "oldest pods should be removed if limit exceeded",
@ -280,7 +306,21 @@ func TestContainerGC(t *testing.T) {
makeGCContainer("foo6", "bar6", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo7", "bar7", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
},
remain: []int{0, 2, 4, 6, 8, 9},
remain: []int{0, 2, 4, 6, 8, 9},
evictNonDeletedPods: false,
},
{
description: "all non-running containers should be removed when evictNonDeletedPods is set",
containers: []containerTemplate{
makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo1", "bar1", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo1", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo2", "bar2", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo3", "bar3", 0, 0, runtimeapi.ContainerState_CONTAINER_RUNNING),
},
remain: []int{5},
evictNonDeletedPods: true,
},
{
description: "containers for deleted pods should be removed",
@ -292,7 +332,8 @@ func TestContainerGC(t *testing.T) {
makeGCContainer("deleted", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("deleted", "bar1", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
},
remain: []int{0, 1, 2},
remain: []int{0, 1, 2},
evictNonDeletedPods: false,
},
} {
t.Logf("TestCase #%d: %+v", c, test)
@ -302,7 +343,7 @@ func TestContainerGC(t *testing.T) {
if test.policy == nil {
test.policy = &defaultGCPolicy
}
err := m.containerGC.evictContainers(*test.policy, true)
err := m.containerGC.evictContainers(*test.policy, true, test.evictNonDeletedPods)
assert.NoError(t, err)
realRemain, err := fakeRuntime.ListContainers(nil)
assert.NoError(t, err)

View File

@ -877,8 +877,8 @@ func (m *kubeGenericRuntimeManager) GetNetNS(_ kubecontainer.ContainerID) (strin
}
// GarbageCollect removes dead containers using the specified container gc policy.
func (m *kubeGenericRuntimeManager) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool) error {
return m.containerGC.GarbageCollect(gcPolicy, allSourcesReady)
func (m *kubeGenericRuntimeManager) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool, evictNonDeletedPods bool) error {
return m.containerGC.GarbageCollect(gcPolicy, allSourcesReady, evictNonDeletedPods)
}
// GetPodContainerID gets pod sandbox ID

View File

@ -1955,7 +1955,7 @@ func (r *Runtime) getPodSystemdServiceFiles() ([]os.FileInfo, error) {
// - If the number of containers exceeds gcPolicy.MaxContainers,
// then containers whose ages are older than gcPolicy.minAge will
// be removed.
func (r *Runtime) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool) error {
func (r *Runtime) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool, _ bool) error {
var errlist []error
var totalInactiveContainers int
var inactivePods []*rktapi.Pod

View File

@ -1831,7 +1831,8 @@ func TestGarbageCollect(t *testing.T) {
}
allSourcesReady := true
err := rkt.GarbageCollect(tt.gcPolicy, allSourcesReady)
evictNonDeletedPods := false
err := rkt.GarbageCollect(tt.gcPolicy, allSourcesReady, evictNonDeletedPods)
assert.NoError(t, err, testCaseHint)
sort.Sort(sortedStringList(tt.expectedCommands))

View File

@ -126,7 +126,7 @@ func TestRunOnce(t *testing.T) {
fakeKillPodFunc := func(pod *v1.Pod, podStatus v1.PodStatus, gracePeriodOverride *int64) error {
return nil
}
evictionManager, evictionAdmitHandler := eviction.NewManager(kb.resourceAnalyzer, eviction.Config{}, fakeKillPodFunc, nil, kb.recorder, nodeRef, kb.clock)
evictionManager, evictionAdmitHandler := eviction.NewManager(kb.resourceAnalyzer, eviction.Config{}, fakeKillPodFunc, nil, nil, kb.recorder, nodeRef, kb.clock)
kb.evictionManager = evictionManager
kb.admitHandlers.AddPodAdmitHandler(evictionAdmitHandler)