From 889afa5e2dc03dd521f97746278098e376a9d794 Mon Sep 17 00:00:00 2001 From: David Ashpole Date: Mon, 22 May 2017 11:00:22 -0700 Subject: [PATCH] trigger aggressive container garbage collection when under disk pressure --- pkg/kubelet/container/container_gc.go | 28 ++++- pkg/kubelet/container/runtime.go | 4 +- pkg/kubelet/container/testing/fake_runtime.go | 2 +- pkg/kubelet/container/testing/runtime_mock.go | 4 +- pkg/kubelet/eviction/eviction_manager.go | 7 +- pkg/kubelet/eviction/eviction_manager_test.go | 78 ++++++++----- pkg/kubelet/eviction/helpers.go | 28 ++--- pkg/kubelet/eviction/types.go | 7 ++ pkg/kubelet/kubelet.go | 6 +- pkg/kubelet/kubelet_test.go | 2 +- pkg/kubelet/kuberuntime/kuberuntime_gc.go | 14 +-- .../kuberuntime/kuberuntime_gc_test.go | 107 ++++++++++++------ .../kuberuntime/kuberuntime_manager.go | 4 +- pkg/kubelet/rkt/rkt.go | 2 +- pkg/kubelet/rkt/rkt_test.go | 3 +- pkg/kubelet/runonce_test.go | 2 +- 16 files changed, 193 insertions(+), 105 deletions(-) diff --git a/pkg/kubelet/container/container_gc.go b/pkg/kubelet/container/container_gc.go index a66acc9ef09..be2fac4b996 100644 --- a/pkg/kubelet/container/container_gc.go +++ b/pkg/kubelet/container/container_gc.go @@ -39,7 +39,15 @@ type ContainerGCPolicy struct { // Implementation is thread-compatible. type ContainerGC interface { // Garbage collect containers. - GarbageCollect(allSourcesReady bool) error + GarbageCollect() error + // Deletes all unused containers, including containers belonging to pods that are terminated but not deleted + DeleteAllUnusedContainers() error +} + +// SourcesReadyProvider knows how to determine if configuration sources are ready +type SourcesReadyProvider interface { + // AllReady returns true if the currently configured sources have all been seen. + AllReady() bool } // TODO(vmarmol): Preferentially remove pod infra containers. @@ -49,20 +57,28 @@ type realContainerGC struct { // Policy for garbage collection. policy ContainerGCPolicy + + // sourcesReadyProvider provides the readyness of kubelet configuration sources. + sourcesReadyProvider SourcesReadyProvider } // New ContainerGC instance with the specified policy. -func NewContainerGC(runtime Runtime, policy ContainerGCPolicy) (ContainerGC, error) { +func NewContainerGC(runtime Runtime, policy ContainerGCPolicy, sourcesReadyProvider SourcesReadyProvider) (ContainerGC, error) { if policy.MinAge < 0 { return nil, fmt.Errorf("invalid minimum garbage collection age: %v", policy.MinAge) } return &realContainerGC{ - runtime: runtime, - policy: policy, + runtime: runtime, + policy: policy, + sourcesReadyProvider: sourcesReadyProvider, }, nil } -func (cgc *realContainerGC) GarbageCollect(allSourcesReady bool) error { - return cgc.runtime.GarbageCollect(cgc.policy, allSourcesReady) +func (cgc *realContainerGC) GarbageCollect() error { + return cgc.runtime.GarbageCollect(cgc.policy, cgc.sourcesReadyProvider.AllReady(), false) +} + +func (cgc *realContainerGC) DeleteAllUnusedContainers() error { + return cgc.runtime.GarbageCollect(cgc.policy, cgc.sourcesReadyProvider.AllReady(), true) } diff --git a/pkg/kubelet/container/runtime.go b/pkg/kubelet/container/runtime.go index 661a0ecc77d..779cd89d26a 100644 --- a/pkg/kubelet/container/runtime.go +++ b/pkg/kubelet/container/runtime.go @@ -82,8 +82,10 @@ type Runtime interface { // complete list of pods from all avialble sources (e.g., apiserver, http, // file). In this case, garbage collector should refrain itself from aggressive // behavior such as removing all containers of unrecognized pods (yet). + // If evictNonDeletedPods is set to true, containers and sandboxes belonging to pods + // that are terminated, but not deleted will be evicted. Otherwise, only deleted pods will be GC'd. // TODO: Revisit this method and make it cleaner. - GarbageCollect(gcPolicy ContainerGCPolicy, allSourcesReady bool) error + GarbageCollect(gcPolicy ContainerGCPolicy, allSourcesReady bool, evictNonDeletedPods bool) error // Syncs the running pod into the desired pod. SyncPod(pod *v1.Pod, apiPodStatus v1.PodStatus, podStatus *PodStatus, pullSecrets []v1.Secret, backOff *flowcontrol.Backoff) PodSyncResult // KillPod kills all the containers of a pod. Pod may be nil, running pod must not be. diff --git a/pkg/kubelet/container/testing/fake_runtime.go b/pkg/kubelet/container/testing/fake_runtime.go index c05af6aeb9e..8966896c24e 100644 --- a/pkg/kubelet/container/testing/fake_runtime.go +++ b/pkg/kubelet/container/testing/fake_runtime.go @@ -431,7 +431,7 @@ func (f *FakeRuntime) GetPodContainerID(pod *Pod) (ContainerID, error) { return ContainerID{}, f.Err } -func (f *FakeRuntime) GarbageCollect(gcPolicy ContainerGCPolicy, ready bool) error { +func (f *FakeRuntime) GarbageCollect(gcPolicy ContainerGCPolicy, ready bool, evictNonDeletedPods bool) error { f.Lock() defer f.Unlock() diff --git a/pkg/kubelet/container/testing/runtime_mock.go b/pkg/kubelet/container/testing/runtime_mock.go index 2864152062b..c3b12e0a3a2 100644 --- a/pkg/kubelet/container/testing/runtime_mock.go +++ b/pkg/kubelet/container/testing/runtime_mock.go @@ -140,8 +140,8 @@ func (r *Mock) GetPodContainerID(pod *Pod) (ContainerID, error) { return ContainerID{}, args.Error(0) } -func (r *Mock) GarbageCollect(gcPolicy ContainerGCPolicy, ready bool) error { - args := r.Called(gcPolicy, ready) +func (r *Mock) GarbageCollect(gcPolicy ContainerGCPolicy, ready bool, evictNonDeletedPods bool) error { + args := r.Called(gcPolicy, ready, evictNonDeletedPods) return args.Error(0) } diff --git a/pkg/kubelet/eviction/eviction_manager.go b/pkg/kubelet/eviction/eviction_manager.go index 09db40d0075..cdfad9e4e8b 100644 --- a/pkg/kubelet/eviction/eviction_manager.go +++ b/pkg/kubelet/eviction/eviction_manager.go @@ -58,6 +58,8 @@ type managerImpl struct { killPodFunc KillPodFunc // the interface that knows how to do image gc imageGC ImageGC + // the interface that knows how to do image gc + containerGC ContainerGC // protects access to internal state sync.RWMutex // node conditions are the set of conditions present @@ -95,6 +97,7 @@ func NewManager( config Config, killPodFunc KillPodFunc, imageGC ImageGC, + containerGC ContainerGC, recorder record.EventRecorder, nodeRef *clientv1.ObjectReference, clock clock.Clock) (Manager, lifecycle.PodAdmitHandler) { @@ -102,6 +105,7 @@ func NewManager( clock: clock, killPodFunc: killPodFunc, imageGC: imageGC, + containerGC: containerGC, config: config, recorder: recorder, summaryProvider: summaryProvider, @@ -223,8 +227,7 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act } m.dedicatedImageFs = &hasImageFs m.resourceToRankFunc = buildResourceToRankFunc(hasImageFs) - m.resourceToNodeReclaimFuncs = buildResourceToNodeReclaimFuncs(m.imageGC, hasImageFs) - + m.resourceToNodeReclaimFuncs = buildResourceToNodeReclaimFuncs(m.imageGC, m.containerGC, hasImageFs) } activePods := podFunc() diff --git a/pkg/kubelet/eviction/eviction_manager_test.go b/pkg/kubelet/eviction/eviction_manager_test.go index 27aa00404f5..2177be4b35a 100644 --- a/pkg/kubelet/eviction/eviction_manager_test.go +++ b/pkg/kubelet/eviction/eviction_manager_test.go @@ -77,17 +77,24 @@ func (m *mockNodeProvider) GetNode() (*v1.Node, error) { return &m.node, nil } -// mockImageGC is used to simulate invoking image garbage collection. -type mockImageGC struct { - err error - freed int64 - invoked bool +// mockDiskGC is used to simulate invoking image and container garbage collection. +type mockDiskGC struct { + err error + imageBytesFreed int64 + imageGCInvoked bool + containerGCInvoked bool } // DeleteUnusedImages returns the mocked values. -func (m *mockImageGC) DeleteUnusedImages() (int64, error) { - m.invoked = true - return m.freed, m.err +func (m *mockDiskGC) DeleteUnusedImages() (int64, error) { + m.imageGCInvoked = true + return m.imageBytesFreed, m.err +} + +// DeleteAllUnusedContainers returns the mocked value +func (m *mockDiskGC) DeleteAllUnusedContainers() error { + m.containerGCInvoked = true + return m.err } func makePodWithMemoryStats(name string, requests v1.ResourceList, limits v1.ResourceList, memoryWorkingSet string) (*v1.Pod, statsapi.PodStats) { @@ -194,7 +201,7 @@ func TestMemoryPressure(t *testing.T) { podKiller := &mockPodKiller{} diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false} nodeProvider := newMockNodeProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("2Gi")}) - imageGC := &mockImageGC{freed: int64(0), err: nil} + imageGC := &mockDiskGC{imageBytesFreed: int64(0), err: nil} nodeRef := &clientv1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""} config := Config{ @@ -412,7 +419,7 @@ func TestDiskPressureNodeFs(t *testing.T) { podKiller := &mockPodKiller{} diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false} nodeProvider := newMockNodeProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("2Gi")}) - imageGC := &mockImageGC{freed: int64(0), err: nil} + diskGC := &mockDiskGC{imageBytesFreed: int64(0), err: nil} nodeRef := &clientv1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""} config := Config{ @@ -440,7 +447,8 @@ func TestDiskPressureNodeFs(t *testing.T) { manager := &managerImpl{ clock: fakeClock, killPodFunc: podKiller.killPodNow, - imageGC: imageGC, + imageGC: diskGC, + containerGC: diskGC, config: config, recorder: &record.FakeRecorder{}, summaryProvider: summaryProvider, @@ -610,7 +618,7 @@ func TestMinReclaim(t *testing.T) { podKiller := &mockPodKiller{} diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false} nodeProvider := newMockNodeProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("2Gi")}) - imageGC := &mockImageGC{freed: int64(0), err: nil} + diskGC := &mockDiskGC{imageBytesFreed: int64(0), err: nil} nodeRef := &clientv1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""} config := Config{ @@ -633,7 +641,8 @@ func TestMinReclaim(t *testing.T) { manager := &managerImpl{ clock: fakeClock, killPodFunc: podKiller.killPodNow, - imageGC: imageGC, + imageGC: diskGC, + containerGC: diskGC, config: config, recorder: &record.FakeRecorder{}, summaryProvider: summaryProvider, @@ -750,7 +759,7 @@ func TestNodeReclaimFuncs(t *testing.T) { diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false} nodeProvider := newMockNodeProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("2Gi")}) imageGcFree := resource.MustParse("700Mi") - imageGC := &mockImageGC{freed: imageGcFree.Value(), err: nil} + diskGC := &mockDiskGC{imageBytesFreed: imageGcFree.Value(), err: nil} nodeRef := &clientv1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""} config := Config{ @@ -773,7 +782,8 @@ func TestNodeReclaimFuncs(t *testing.T) { manager := &managerImpl{ clock: fakeClock, killPodFunc: podKiller.killPodNow, - imageGC: imageGC, + imageGC: diskGC, + containerGC: diskGC, config: config, recorder: &record.FakeRecorder{}, summaryProvider: summaryProvider, @@ -801,7 +811,7 @@ func TestNodeReclaimFuncs(t *testing.T) { } // verify image gc was invoked - if !imageGC.invoked { + if !diskGC.imageGCInvoked || !diskGC.containerGCInvoked { t.Errorf("Manager should have invoked image gc") } @@ -811,7 +821,8 @@ func TestNodeReclaimFuncs(t *testing.T) { } // reset state - imageGC.invoked = false + diskGC.imageGCInvoked = false + diskGC.containerGCInvoked = false // remove disk pressure fakeClock.Step(20 * time.Minute) @@ -833,8 +844,8 @@ func TestNodeReclaimFuncs(t *testing.T) { t.Errorf("Manager should report disk pressure") } - // ensure image gc was invoked - if !imageGC.invoked { + // ensure disk gc was invoked + if !diskGC.imageGCInvoked || !diskGC.containerGCInvoked { t.Errorf("Manager should have invoked image gc") } @@ -850,8 +861,9 @@ func TestNodeReclaimFuncs(t *testing.T) { // reduce disk pressure fakeClock.Step(1 * time.Minute) summaryProvider.result = summaryStatsMaker("16Gi", "200Gi", podStats) - imageGC.invoked = false // reset state - podKiller.pod = nil // reset state + diskGC.imageGCInvoked = false // reset state + diskGC.containerGCInvoked = false // reset state + podKiller.pod = nil // reset state manager.synchronize(diskInfoProvider, activePodsFunc, nodeProvider) // we should have disk pressure (because transition period not yet met) @@ -860,7 +872,7 @@ func TestNodeReclaimFuncs(t *testing.T) { } // no image gc should have occurred - if imageGC.invoked { + if diskGC.imageGCInvoked || diskGC.containerGCInvoked { t.Errorf("Manager chose to perform image gc when it was not neeed") } @@ -872,8 +884,9 @@ func TestNodeReclaimFuncs(t *testing.T) { // move the clock past transition period to ensure that we stop reporting pressure fakeClock.Step(5 * time.Minute) summaryProvider.result = summaryStatsMaker("16Gi", "200Gi", podStats) - imageGC.invoked = false // reset state - podKiller.pod = nil // reset state + diskGC.imageGCInvoked = false // reset state + diskGC.containerGCInvoked = false // reset state + podKiller.pod = nil // reset state manager.synchronize(diskInfoProvider, activePodsFunc, nodeProvider) // we should not have disk pressure (because transition period met) @@ -882,7 +895,7 @@ func TestNodeReclaimFuncs(t *testing.T) { } // no image gc should have occurred - if imageGC.invoked { + if diskGC.imageGCInvoked || diskGC.containerGCInvoked { t.Errorf("Manager chose to perform image gc when it was not neeed") } @@ -943,7 +956,7 @@ func TestInodePressureNodeFsInodes(t *testing.T) { podKiller := &mockPodKiller{} diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false} nodeProvider := newMockNodeProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("2Gi")}) - imageGC := &mockImageGC{freed: int64(0), err: nil} + diskGC := &mockDiskGC{imageBytesFreed: int64(0), err: nil} nodeRef := &clientv1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""} config := Config{ @@ -971,7 +984,8 @@ func TestInodePressureNodeFsInodes(t *testing.T) { manager := &managerImpl{ clock: fakeClock, killPodFunc: podKiller.killPodNow, - imageGC: imageGC, + imageGC: diskGC, + containerGC: diskGC, config: config, recorder: &record.FakeRecorder{}, summaryProvider: summaryProvider, @@ -1144,7 +1158,7 @@ func TestCriticalPodsAreNotEvicted(t *testing.T) { podKiller := &mockPodKiller{} diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false} nodeProvider := newMockNodeProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("2Gi")}) - imageGC := &mockImageGC{freed: int64(0), err: nil} + diskGC := &mockDiskGC{imageBytesFreed: int64(0), err: nil} nodeRef := &clientv1.ObjectReference{ Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: "", } @@ -1174,7 +1188,8 @@ func TestCriticalPodsAreNotEvicted(t *testing.T) { manager := &managerImpl{ clock: fakeClock, killPodFunc: podKiller.killPodNow, - imageGC: imageGC, + imageGC: diskGC, + containerGC: diskGC, config: config, recorder: &record.FakeRecorder{}, summaryProvider: summaryProvider, @@ -1276,7 +1291,7 @@ func TestAllocatableMemoryPressure(t *testing.T) { podKiller := &mockPodKiller{} diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false} nodeProvider := newMockNodeProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("2Gi")}) - imageGC := &mockImageGC{freed: int64(0), err: nil} + diskGC := &mockDiskGC{imageBytesFreed: int64(0), err: nil} nodeRef := &clientv1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""} config := Config{ @@ -1296,7 +1311,8 @@ func TestAllocatableMemoryPressure(t *testing.T) { manager := &managerImpl{ clock: fakeClock, killPodFunc: podKiller.killPodNow, - imageGC: imageGC, + imageGC: diskGC, + containerGC: diskGC, config: config, recorder: &record.FakeRecorder{}, summaryProvider: summaryProvider, diff --git a/pkg/kubelet/eviction/helpers.go b/pkg/kubelet/eviction/helpers.go index 583097f37bc..4c9532c6146 100644 --- a/pkg/kubelet/eviction/helpers.go +++ b/pkg/kubelet/eviction/helpers.go @@ -1019,32 +1019,34 @@ func PodIsEvicted(podStatus v1.PodStatus) bool { } // buildResourceToNodeReclaimFuncs returns reclaim functions associated with resources. -func buildResourceToNodeReclaimFuncs(imageGC ImageGC, withImageFs bool) map[v1.ResourceName]nodeReclaimFuncs { +func buildResourceToNodeReclaimFuncs(imageGC ImageGC, containerGC ContainerGC, withImageFs bool) map[v1.ResourceName]nodeReclaimFuncs { resourceToReclaimFunc := map[v1.ResourceName]nodeReclaimFuncs{} // usage of an imagefs is optional if withImageFs { // with an imagefs, nodefs pressure should just delete logs - resourceToReclaimFunc[resourceNodeFs] = nodeReclaimFuncs{deleteLogs()} - resourceToReclaimFunc[resourceNodeFsInodes] = nodeReclaimFuncs{deleteLogs()} + resourceToReclaimFunc[resourceNodeFs] = nodeReclaimFuncs{} + resourceToReclaimFunc[resourceNodeFsInodes] = nodeReclaimFuncs{} // with an imagefs, imagefs pressure should delete unused images - resourceToReclaimFunc[resourceImageFs] = nodeReclaimFuncs{deleteImages(imageGC, true)} - resourceToReclaimFunc[resourceImageFsInodes] = nodeReclaimFuncs{deleteImages(imageGC, false)} + resourceToReclaimFunc[resourceImageFs] = nodeReclaimFuncs{deleteTerminatedContainers(containerGC), deleteImages(imageGC, true)} + resourceToReclaimFunc[resourceImageFsInodes] = nodeReclaimFuncs{deleteTerminatedContainers(containerGC), deleteImages(imageGC, false)} } else { // without an imagefs, nodefs pressure should delete logs, and unused images // since imagefs and nodefs share a common device, they share common reclaim functions - resourceToReclaimFunc[resourceNodeFs] = nodeReclaimFuncs{deleteLogs(), deleteImages(imageGC, true)} - resourceToReclaimFunc[resourceNodeFsInodes] = nodeReclaimFuncs{deleteLogs(), deleteImages(imageGC, false)} - resourceToReclaimFunc[resourceImageFs] = nodeReclaimFuncs{deleteLogs(), deleteImages(imageGC, true)} - resourceToReclaimFunc[resourceImageFsInodes] = nodeReclaimFuncs{deleteLogs(), deleteImages(imageGC, false)} + resourceToReclaimFunc[resourceNodeFs] = nodeReclaimFuncs{deleteTerminatedContainers(containerGC), deleteImages(imageGC, true)} + resourceToReclaimFunc[resourceNodeFsInodes] = nodeReclaimFuncs{deleteTerminatedContainers(containerGC), deleteImages(imageGC, false)} + resourceToReclaimFunc[resourceImageFs] = nodeReclaimFuncs{deleteTerminatedContainers(containerGC), deleteImages(imageGC, true)} + resourceToReclaimFunc[resourceImageFsInodes] = nodeReclaimFuncs{deleteTerminatedContainers(containerGC), deleteImages(imageGC, false)} } return resourceToReclaimFunc } -// deleteLogs will delete logs to free up disk pressure. -func deleteLogs() nodeReclaimFunc { +// deleteTerminatedContainers will delete terminated containers to free up disk pressure. +func deleteTerminatedContainers(containerGC ContainerGC) nodeReclaimFunc { return func() (*resource.Quantity, error) { - // TODO: not yet supported. - return resource.NewQuantity(int64(0), resource.BinarySI), nil + glog.Infof("eviction manager: attempting to delete unused containers") + err := containerGC.DeleteAllUnusedContainers() + // Calculating bytes freed is not yet supported. + return resource.NewQuantity(int64(0), resource.BinarySI), err } } diff --git a/pkg/kubelet/eviction/types.go b/pkg/kubelet/eviction/types.go index a5adc4fad8e..8f986eb0d1e 100644 --- a/pkg/kubelet/eviction/types.go +++ b/pkg/kubelet/eviction/types.go @@ -81,6 +81,13 @@ type ImageGC interface { DeleteUnusedImages() (int64, error) } +// ContainerGC is responsible for performing garbage collection of unused containers. +type ContainerGC interface { + // DeleteAllUnusedContainers deletes all unused containers, even those that belong to pods that are terminated, but not deleted. + // It returns an error if it is unsuccessful. + DeleteAllUnusedContainers() error +} + // KillPodFunc kills a pod. // The pod status is updated, and then it is killed with the specified grace period. // This function must block until either the pod is killed or an error is encountered. diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index c3a84681185..60b9d3d9acd 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -671,7 +671,7 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub klet.updatePodCIDR(kubeCfg.PodCIDR) // setup containerGC - containerGC, err := kubecontainer.NewContainerGC(klet.containerRuntime, containerGCPolicy) + containerGC, err := kubecontainer.NewContainerGC(klet.containerRuntime, containerGCPolicy, klet.sourcesReady) if err != nil { return nil, err } @@ -761,7 +761,7 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub klet.setNodeStatusFuncs = klet.defaultNodeStatusFuncs() // setup eviction manager - evictionManager, evictionAdmitHandler := eviction.NewManager(klet.resourceAnalyzer, evictionConfig, killPodNow(klet.podWorkers, kubeDeps.Recorder), klet.imageManager, kubeDeps.Recorder, nodeRef, klet.clock) + evictionManager, evictionAdmitHandler := eviction.NewManager(klet.resourceAnalyzer, evictionConfig, killPodNow(klet.podWorkers, kubeDeps.Recorder), klet.imageManager, klet.containerGC, kubeDeps.Recorder, nodeRef, klet.clock) klet.evictionManager = evictionManager klet.admitHandlers.AddPodAdmitHandler(evictionAdmitHandler) @@ -1184,7 +1184,7 @@ func (kl *Kubelet) setupDataDirs() error { func (kl *Kubelet) StartGarbageCollection() { loggedContainerGCFailure := false go wait.Until(func() { - if err := kl.containerGC.GarbageCollect(kl.sourcesReady.AllReady()); err != nil { + if err := kl.containerGC.GarbageCollect(); err != nil { glog.Errorf("Container garbage collection failed: %v", err) kl.recorder.Eventf(kl.nodeRef, v1.EventTypeWarning, events.ContainerGCFailed, err.Error()) loggedContainerGCFailure = true diff --git a/pkg/kubelet/kubelet_test.go b/pkg/kubelet/kubelet_test.go index 04a66fc1a7a..943243c1c49 100644 --- a/pkg/kubelet/kubelet_test.go +++ b/pkg/kubelet/kubelet_test.go @@ -270,7 +270,7 @@ func newTestKubeletWithImageList( Namespace: "", } // setup eviction manager - evictionManager, evictionAdmitHandler := eviction.NewManager(kubelet.resourceAnalyzer, eviction.Config{}, killPodNow(kubelet.podWorkers, fakeRecorder), kubelet.imageManager, fakeRecorder, nodeRef, kubelet.clock) + evictionManager, evictionAdmitHandler := eviction.NewManager(kubelet.resourceAnalyzer, eviction.Config{}, killPodNow(kubelet.podWorkers, fakeRecorder), kubelet.imageManager, kubelet.containerGC, fakeRecorder, nodeRef, kubelet.clock) kubelet.evictionManager = evictionManager kubelet.admitHandlers.AddPodAdmitHandler(evictionAdmitHandler) diff --git a/pkg/kubelet/kuberuntime/kuberuntime_gc.go b/pkg/kubelet/kuberuntime/kuberuntime_gc.go index 842d25e5f93..45796c26a17 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_gc.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_gc.go @@ -209,7 +209,7 @@ func (cgc *containerGC) evictableContainers(minAge time.Duration) (containersByE } // evict all containers that are evictable -func (cgc *containerGC) evictContainers(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool) error { +func (cgc *containerGC) evictContainers(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool, evictNonDeletedPods bool) error { // Separate containers by evict units. evictUnits, err := cgc.evictableContainers(gcPolicy.MinAge) if err != nil { @@ -219,7 +219,7 @@ func (cgc *containerGC) evictContainers(gcPolicy kubecontainer.ContainerGCPolicy // Remove deleted pod containers if all sources are ready. if allSourcesReady { for key, unit := range evictUnits { - if cgc.isPodDeleted(key.uid) { + if cgc.isPodDeleted(key.uid) || evictNonDeletedPods { cgc.removeOldestN(unit, len(unit)) // Remove all. delete(evictUnits, key) } @@ -261,7 +261,7 @@ func (cgc *containerGC) evictContainers(gcPolicy kubecontainer.ContainerGCPolicy // 2. contains no containers. // 3. belong to a non-existent (i.e., already removed) pod, or is not the // most recently created sandbox for the pod. -func (cgc *containerGC) evictSandboxes() error { +func (cgc *containerGC) evictSandboxes(evictNonDeletedPods bool) error { containers, err := cgc.manager.getKubeletContainers(true) if err != nil { return err @@ -307,7 +307,7 @@ func (cgc *containerGC) evictSandboxes() error { } for podUID, sandboxes := range sandboxesByPod { - if cgc.isPodDeleted(podUID) { + if cgc.isPodDeleted(podUID) || evictNonDeletedPods { // Remove all evictable sandboxes if the pod has been removed. // Note that the latest dead sandbox is also removed if there is // already an active one. @@ -367,14 +367,14 @@ func (cgc *containerGC) evictPodLogsDirectories(allSourcesReady bool) error { // * removes oldest dead containers by enforcing gcPolicy.MaxContainers. // * gets evictable sandboxes which are not ready and contains no containers. // * removes evictable sandboxes. -func (cgc *containerGC) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool) error { +func (cgc *containerGC) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool, evictNonDeletedPods bool) error { // Remove evictable containers - if err := cgc.evictContainers(gcPolicy, allSourcesReady); err != nil { + if err := cgc.evictContainers(gcPolicy, allSourcesReady, evictNonDeletedPods); err != nil { return err } // Remove sandboxes with zero containers - if err := cgc.evictSandboxes(); err != nil { + if err := cgc.evictSandboxes(evictNonDeletedPods); err != nil { return err } diff --git a/pkg/kubelet/kuberuntime/kuberuntime_gc_test.go b/pkg/kubelet/kuberuntime/kuberuntime_gc_test.go index b3af9a5dce5..ff321d023d0 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_gc_test.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_gc_test.go @@ -62,27 +62,30 @@ func TestSandboxGC(t *testing.T) { } for c, test := range []struct { - description string // description of the test case - sandboxes []sandboxTemplate // templates of sandboxes - containers []containerTemplate // templates of containers - minAge time.Duration // sandboxMinGCAge - remain []int // template indexes of remaining sandboxes + description string // description of the test case + sandboxes []sandboxTemplate // templates of sandboxes + containers []containerTemplate // templates of containers + minAge time.Duration // sandboxMinGCAge + remain []int // template indexes of remaining sandboxes + evictNonDeletedPods bool }{ { description: "notready sandboxes without containers for deleted pods should be garbage collected.", sandboxes: []sandboxTemplate{ makeGCSandbox(pods[2], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, false, 0), }, - containers: []containerTemplate{}, - remain: []int{}, + containers: []containerTemplate{}, + remain: []int{}, + evictNonDeletedPods: false, }, { description: "ready sandboxes without containers for deleted pods should not be garbage collected.", sandboxes: []sandboxTemplate{ makeGCSandbox(pods[2], 0, runtimeapi.PodSandboxState_SANDBOX_READY, false, 0), }, - containers: []containerTemplate{}, - remain: []int{0}, + containers: []containerTemplate{}, + remain: []int{0}, + evictNonDeletedPods: false, }, { description: "sandboxes for existing pods should not be garbage collected.", @@ -90,8 +93,19 @@ func TestSandboxGC(t *testing.T) { makeGCSandbox(pods[0], 0, runtimeapi.PodSandboxState_SANDBOX_READY, true, 0), makeGCSandbox(pods[1], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, 0), }, - containers: []containerTemplate{}, - remain: []int{0, 1}, + containers: []containerTemplate{}, + remain: []int{0, 1}, + evictNonDeletedPods: false, + }, + { + description: "non-running sandboxes for existing pods should be garbage collected if evictNonDeletedPods is set.", + sandboxes: []sandboxTemplate{ + makeGCSandbox(pods[0], 0, runtimeapi.PodSandboxState_SANDBOX_READY, true, 0), + makeGCSandbox(pods[1], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, 0), + }, + containers: []containerTemplate{}, + remain: []int{0}, + evictNonDeletedPods: true, }, { description: "sandbox with containers should not be garbage collected.", @@ -101,7 +115,8 @@ func TestSandboxGC(t *testing.T) { containers: []containerTemplate{ {pod: pods[0], container: &pods[0].Spec.Containers[0], state: runtimeapi.ContainerState_CONTAINER_EXITED}, }, - remain: []int{0}, + remain: []int{0}, + evictNonDeletedPods: false, }, { description: "multiple sandboxes should be handled properly.", @@ -120,7 +135,8 @@ func TestSandboxGC(t *testing.T) { containers: []containerTemplate{ {pod: pods[1], container: &pods[1].Spec.Containers[0], sandboxAttempt: 1, state: runtimeapi.ContainerState_CONTAINER_EXITED}, }, - remain: []int{0, 2}, + remain: []int{0, 2}, + evictNonDeletedPods: false, }, } { t.Logf("TestCase #%d: %+v", c, test) @@ -129,7 +145,7 @@ func TestSandboxGC(t *testing.T) { fakeRuntime.SetFakeSandboxes(fakeSandboxes) fakeRuntime.SetFakeContainers(fakeContainers) - err := m.containerGC.evictSandboxes() + err := m.containerGC.evictSandboxes(test.evictNonDeletedPods) assert.NoError(t, err) realRemain, err := fakeRuntime.ListPodSandbox(nil) assert.NoError(t, err) @@ -165,18 +181,20 @@ func TestContainerGC(t *testing.T) { defaultGCPolicy := kubecontainer.ContainerGCPolicy{MinAge: time.Hour, MaxPerPodContainer: 2, MaxContainers: 6} for c, test := range []struct { - description string // description of the test case - containers []containerTemplate // templates of containers - policy *kubecontainer.ContainerGCPolicy // container gc policy - remain []int // template indexes of remaining containers + description string // description of the test case + containers []containerTemplate // templates of containers + policy *kubecontainer.ContainerGCPolicy // container gc policy + remain []int // template indexes of remaining containers + evictNonDeletedPods bool }{ { description: "all containers should be removed when max container limit is 0", containers: []containerTemplate{ makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), }, - policy: &kubecontainer.ContainerGCPolicy{MinAge: time.Minute, MaxPerPodContainer: 1, MaxContainers: 0}, - remain: []int{}, + policy: &kubecontainer.ContainerGCPolicy{MinAge: time.Minute, MaxPerPodContainer: 1, MaxContainers: 0}, + remain: []int{}, + evictNonDeletedPods: false, }, { description: "max containers should be complied when no max per pod container limit is set", @@ -187,8 +205,9 @@ func TestContainerGC(t *testing.T) { makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), }, - policy: &kubecontainer.ContainerGCPolicy{MinAge: time.Minute, MaxPerPodContainer: -1, MaxContainers: 4}, - remain: []int{0, 1, 2, 3}, + policy: &kubecontainer.ContainerGCPolicy{MinAge: time.Minute, MaxPerPodContainer: -1, MaxContainers: 4}, + remain: []int{0, 1, 2, 3}, + evictNonDeletedPods: false, }, { description: "no containers should be removed if both max container and per pod container limits are not set", @@ -197,8 +216,9 @@ func TestContainerGC(t *testing.T) { makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), }, - policy: &kubecontainer.ContainerGCPolicy{MinAge: time.Minute, MaxPerPodContainer: -1, MaxContainers: -1}, - remain: []int{0, 1, 2}, + policy: &kubecontainer.ContainerGCPolicy{MinAge: time.Minute, MaxPerPodContainer: -1, MaxContainers: -1}, + remain: []int{0, 1, 2}, + evictNonDeletedPods: false, }, { description: "recently started containers should not be removed", @@ -207,7 +227,8 @@ func TestContainerGC(t *testing.T) { makeGCContainer("foo", "bar", 1, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo", "bar", 0, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED), }, - remain: []int{0, 1, 2}, + remain: []int{0, 1, 2}, + evictNonDeletedPods: false, }, { description: "oldest containers should be removed when per pod container limit exceeded", @@ -216,7 +237,8 @@ func TestContainerGC(t *testing.T) { makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), }, - remain: []int{0, 1}, + remain: []int{0, 1}, + evictNonDeletedPods: false, }, { description: "running containers should not be removed", @@ -225,7 +247,8 @@ func TestContainerGC(t *testing.T) { makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_RUNNING), }, - remain: []int{0, 1, 2}, + remain: []int{0, 1, 2}, + evictNonDeletedPods: false, }, { description: "no containers should be removed when limits are not exceeded", @@ -233,7 +256,8 @@ func TestContainerGC(t *testing.T) { makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), }, - remain: []int{0, 1}, + remain: []int{0, 1}, + evictNonDeletedPods: false, }, { description: "max container count should apply per (UID, container) pair", @@ -248,7 +272,8 @@ func TestContainerGC(t *testing.T) { makeGCContainer("foo2", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo2", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), }, - remain: []int{0, 1, 3, 4, 6, 7}, + remain: []int{0, 1, 3, 4, 6, 7}, + evictNonDeletedPods: false, }, { description: "max limit should apply and try to keep from every pod", @@ -264,7 +289,8 @@ func TestContainerGC(t *testing.T) { makeGCContainer("foo4", "bar4", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo4", "bar4", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), }, - remain: []int{0, 2, 4, 6, 8}, + remain: []int{0, 2, 4, 6, 8}, + evictNonDeletedPods: false, }, { description: "oldest pods should be removed if limit exceeded", @@ -280,7 +306,21 @@ func TestContainerGC(t *testing.T) { makeGCContainer("foo6", "bar6", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo7", "bar7", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), }, - remain: []int{0, 2, 4, 6, 8, 9}, + remain: []int{0, 2, 4, 6, 8, 9}, + evictNonDeletedPods: false, + }, + { + description: "all non-running containers should be removed when evictNonDeletedPods is set", + containers: []containerTemplate{ + makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer("foo1", "bar1", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer("foo1", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer("foo2", "bar2", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer("foo3", "bar3", 0, 0, runtimeapi.ContainerState_CONTAINER_RUNNING), + }, + remain: []int{5}, + evictNonDeletedPods: true, }, { description: "containers for deleted pods should be removed", @@ -292,7 +332,8 @@ func TestContainerGC(t *testing.T) { makeGCContainer("deleted", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("deleted", "bar1", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), }, - remain: []int{0, 1, 2}, + remain: []int{0, 1, 2}, + evictNonDeletedPods: false, }, } { t.Logf("TestCase #%d: %+v", c, test) @@ -302,7 +343,7 @@ func TestContainerGC(t *testing.T) { if test.policy == nil { test.policy = &defaultGCPolicy } - err := m.containerGC.evictContainers(*test.policy, true) + err := m.containerGC.evictContainers(*test.policy, true, test.evictNonDeletedPods) assert.NoError(t, err) realRemain, err := fakeRuntime.ListContainers(nil) assert.NoError(t, err) diff --git a/pkg/kubelet/kuberuntime/kuberuntime_manager.go b/pkg/kubelet/kuberuntime/kuberuntime_manager.go index ac644e9ddfa..2e05ec02ca4 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_manager.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_manager.go @@ -877,8 +877,8 @@ func (m *kubeGenericRuntimeManager) GetNetNS(_ kubecontainer.ContainerID) (strin } // GarbageCollect removes dead containers using the specified container gc policy. -func (m *kubeGenericRuntimeManager) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool) error { - return m.containerGC.GarbageCollect(gcPolicy, allSourcesReady) +func (m *kubeGenericRuntimeManager) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool, evictNonDeletedPods bool) error { + return m.containerGC.GarbageCollect(gcPolicy, allSourcesReady, evictNonDeletedPods) } // GetPodContainerID gets pod sandbox ID diff --git a/pkg/kubelet/rkt/rkt.go b/pkg/kubelet/rkt/rkt.go index 673ab187ec2..d16c92db311 100644 --- a/pkg/kubelet/rkt/rkt.go +++ b/pkg/kubelet/rkt/rkt.go @@ -1955,7 +1955,7 @@ func (r *Runtime) getPodSystemdServiceFiles() ([]os.FileInfo, error) { // - If the number of containers exceeds gcPolicy.MaxContainers, // then containers whose ages are older than gcPolicy.minAge will // be removed. -func (r *Runtime) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool) error { +func (r *Runtime) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool, _ bool) error { var errlist []error var totalInactiveContainers int var inactivePods []*rktapi.Pod diff --git a/pkg/kubelet/rkt/rkt_test.go b/pkg/kubelet/rkt/rkt_test.go index e3955e4cf20..46140526855 100644 --- a/pkg/kubelet/rkt/rkt_test.go +++ b/pkg/kubelet/rkt/rkt_test.go @@ -1831,7 +1831,8 @@ func TestGarbageCollect(t *testing.T) { } allSourcesReady := true - err := rkt.GarbageCollect(tt.gcPolicy, allSourcesReady) + evictNonDeletedPods := false + err := rkt.GarbageCollect(tt.gcPolicy, allSourcesReady, evictNonDeletedPods) assert.NoError(t, err, testCaseHint) sort.Sort(sortedStringList(tt.expectedCommands)) diff --git a/pkg/kubelet/runonce_test.go b/pkg/kubelet/runonce_test.go index d23a79f3a7a..6f210c41570 100644 --- a/pkg/kubelet/runonce_test.go +++ b/pkg/kubelet/runonce_test.go @@ -126,7 +126,7 @@ func TestRunOnce(t *testing.T) { fakeKillPodFunc := func(pod *v1.Pod, podStatus v1.PodStatus, gracePeriodOverride *int64) error { return nil } - evictionManager, evictionAdmitHandler := eviction.NewManager(kb.resourceAnalyzer, eviction.Config{}, fakeKillPodFunc, nil, kb.recorder, nodeRef, kb.clock) + evictionManager, evictionAdmitHandler := eviction.NewManager(kb.resourceAnalyzer, eviction.Config{}, fakeKillPodFunc, nil, nil, kb.recorder, nodeRef, kb.clock) kb.evictionManager = evictionManager kb.admitHandlers.AddPodAdmitHandler(evictionAdmitHandler)