diff --git a/docs/admin/garbage-collection.md b/docs/admin/garbage-collection.md index b3fcc86460d..372f269ebf6 100644 --- a/docs/admin/garbage-collection.md +++ b/docs/admin/garbage-collection.md @@ -60,7 +60,7 @@ threshold has been met. The policy for garbage collecting containers considers three user-defined variables. `MinAge` is the minimum age at which a container can be garbage collected. `MaxPerPodContainer` is the maximum number of dead containers any single pod (UID, container name) pair is allowed to have. `MaxContainers` is the maximum number of total dead containers. These variables can be individually disabled by setting 'Min Age' to zero and setting 'MaxPerPodContainer' and 'MaxContainers' respectively to less than zero. -kubelet will act on containers that are unidentified or outside of the boundaries set by the previously mentioned flags. The oldest containers will generally be removed first. 'MaxPerPodContainer' and 'MaxContainer' may potentially conflict with each other in situations where retaining the maximum number of containers per pod ('MaxPerPodContainer') would go outside the allowable range of global dead containers ('MaxContainers'). 'MaxPerPodContainer' would be adjusted in this situation: A worst case scenario would be to downgrade 'MaxPerPodContainer' to 1 and evict the oldest containers. +Kubelet will act on containers that are unidentified, deleted, or outside of the boundaries set by the previously mentioned flags. The oldest containers will generally be removed first. 'MaxPerPodContainer' and 'MaxContainer' may potentially conflict with each other in situations where retaining the maximum number of containers per pod ('MaxPerPodContainer') would go outside the allowable range of global dead containers ('MaxContainers'). 'MaxPerPodContainer' would be adjusted in this situation: A worst case scenario would be to downgrade 'MaxPerPodContainer' to 1 and evict the oldest containers. Additionally, containers owned by pods that have been deleted are removed once they are older than `MinAge`. Containers that are not managed by kubelet are not subject to container garbage collection. diff --git a/pkg/kubelet/dockertools/container_gc.go b/pkg/kubelet/dockertools/container_gc.go index 48a0a4c19e9..fae2194b067 100644 --- a/pkg/kubelet/dockertools/container_gc.go +++ b/pkg/kubelet/dockertools/container_gc.go @@ -32,11 +32,16 @@ import ( type containerGC struct { client DockerInterface + podGetter podGetter containerLogsDir string } -func NewContainerGC(client DockerInterface, containerLogsDir string) *containerGC { - return &containerGC{client: client, containerLogsDir: containerLogsDir} +func NewContainerGC(client DockerInterface, podGetter podGetter, containerLogsDir string) *containerGC { + return &containerGC{ + client: client, + podGetter: podGetter, + containerLogsDir: containerLogsDir, + } } // Internal information kept for containers being considered for GC. @@ -190,6 +195,14 @@ func (cgc *containerGC) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy) } } + // Remove deleted pod containers. + for key, unit := range evictUnits { + if cgc.isPodDeleted(key.uid) { + cgc.removeOldestN(unit, len(unit)) // Remove all. + delete(evictUnits, key) + } + } + // Enforce max containers per evict unit. if gcPolicy.MaxPerPodContainer >= 0 { cgc.enforceMaxContainersPerEvictUnit(evictUnits, gcPolicy.MaxPerPodContainer) @@ -231,3 +244,8 @@ func (cgc *containerGC) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy) return nil } + +func (cgc *containerGC) isPodDeleted(podUID types.UID) bool { + _, found := cgc.podGetter.GetPodByUID(podUID) + return !found +} diff --git a/pkg/kubelet/dockertools/container_gc_test.go b/pkg/kubelet/dockertools/container_gc_test.go index 5e388e38d57..a4e93b39dd8 100644 --- a/pkg/kubelet/dockertools/container_gc_test.go +++ b/pkg/kubelet/dockertools/container_gc_test.go @@ -25,12 +25,15 @@ import ( docker "github.com/fsouza/go-dockerclient" "github.com/stretchr/testify/assert" + "k8s.io/kubernetes/pkg/api" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" + "k8s.io/kubernetes/pkg/types" ) func newTestContainerGC(t *testing.T) (*containerGC, *FakeDockerClient) { fakeDocker := new(FakeDockerClient) - gc := NewContainerGC(fakeDocker, "") + fakePodGetter := newFakePodGetter() + gc := NewContainerGC(fakeDocker, fakePodGetter, "") return gc, fakeDocker } @@ -64,6 +67,19 @@ func makeUndefinedContainer(id string, running bool, created time.Time) *docker. } } +func addPods(podGetter podGetter, podUIDs ...types.UID) { + fakePodGetter := podGetter.(*fakePodGetter) + for _, uid := range podUIDs { + fakePodGetter.pods[uid] = &api.Pod{ + ObjectMeta: api.ObjectMeta{ + Name: "pod" + string(uid), + Namespace: "test", + UID: uid, + }, + } + } +} + func verifyStringArrayEqualsAnyOrder(t *testing.T, actual, expected []string) { act := make([]string, len(actual)) exp := make([]string, len(expected)) @@ -83,6 +99,7 @@ func TestGarbageCollectZeroMaxContainers(t *testing.T) { fakeDocker.SetFakeContainers([]*docker.Container{ makeContainer("1876", "foo", "POD", false, makeTime(0)), }) + addPods(gc.podGetter, "foo") assert.Nil(t, gc.GarbageCollect(kubecontainer.ContainerGCPolicy{time.Minute, 1, 0})) assert.Len(t, fakeDocker.Removed, 1) @@ -97,6 +114,7 @@ func TestGarbageCollectNoMaxPerPodContainerLimit(t *testing.T) { makeContainer("4876", "foo3", "POD", false, makeTime(3)), makeContainer("5876", "foo4", "POD", false, makeTime(4)), }) + addPods(gc.podGetter, "foo", "foo1", "foo2", "foo3", "foo4") assert.Nil(t, gc.GarbageCollect(kubecontainer.ContainerGCPolicy{time.Minute, -1, 4})) assert.Len(t, fakeDocker.Removed, 1) @@ -111,6 +129,7 @@ func TestGarbageCollectNoMaxLimit(t *testing.T) { makeContainer("4876", "foo3", "POD", false, makeTime(0)), makeContainer("5876", "foo4", "POD", false, makeTime(0)), }) + addPods(gc.podGetter, "foo", "foo1", "foo2", "foo3", "foo4") assert.Nil(t, gc.GarbageCollect(kubecontainer.ContainerGCPolicy{time.Minute, 1, -1})) assert.Len(t, fakeDocker.Removed, 0) @@ -210,11 +229,23 @@ func TestGarbageCollect(t *testing.T) { }, expectedRemoved: []string{"1876", "3876", "5876", "7876"}, }, + // Containers for deleted pods should be GC'd. + { + containers: []*docker.Container{ + makeContainer("1876", "foo", "POD", false, makeTime(1)), + makeContainer("2876", "foo", "POD", false, makeTime(2)), + makeContainer("3876", "deleted", "POD", false, makeTime(1)), + makeContainer("4876", "deleted", "POD", false, makeTime(2)), + makeContainer("5876", "deleted", "POD", false, time.Now()), // Deleted pods still respect MinAge. + }, + expectedRemoved: []string{"3876", "4876"}, + }, } for i, test := range tests { t.Logf("Running test case with index %d", i) gc, fakeDocker := newTestContainerGC(t) fakeDocker.SetFakeContainers(test.containers) + addPods(gc.podGetter, "foo", "foo1", "foo2", "foo3", "foo4", "foo5", "foo6", "foo7") assert.Nil(t, gc.GarbageCollect(kubecontainer.ContainerGCPolicy{time.Hour, 2, 6})) verifyStringArrayEqualsAnyOrder(t, fakeDocker.Removed, test.expectedRemoved) } diff --git a/pkg/kubelet/dockertools/fake_manager.go b/pkg/kubelet/dockertools/fake_manager.go index 0c1522db72d..c97db95956f 100644 --- a/pkg/kubelet/dockertools/fake_manager.go +++ b/pkg/kubelet/dockertools/fake_manager.go @@ -18,11 +18,13 @@ package dockertools import ( cadvisorapi "github.com/google/cadvisor/info/v1" + "k8s.io/kubernetes/pkg/api" "k8s.io/kubernetes/pkg/client/record" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" "k8s.io/kubernetes/pkg/kubelet/network" proberesults "k8s.io/kubernetes/pkg/kubelet/prober/results" kubetypes "k8s.io/kubernetes/pkg/kubelet/types" + "k8s.io/kubernetes/pkg/types" "k8s.io/kubernetes/pkg/util" "k8s.io/kubernetes/pkg/util/oom" "k8s.io/kubernetes/pkg/util/procfs" @@ -45,9 +47,23 @@ func NewFakeDockerManager( fakeOOMAdjuster := oom.NewFakeOOMAdjuster() fakeProcFs := procfs.NewFakeProcFS() - dm := NewDockerManager(client, recorder, livenessManager, containerRefManager, machineInfo, podInfraContainerImage, qps, + fakePodGetter := &fakePodGetter{} + dm := NewDockerManager(client, recorder, livenessManager, containerRefManager, fakePodGetter, machineInfo, podInfraContainerImage, qps, burst, containerLogsDir, osInterface, networkPlugin, runtimeHelper, httpClient, &NativeExecHandler{}, fakeOOMAdjuster, fakeProcFs, false, imageBackOff, false, false, true) dm.dockerPuller = &FakeDockerPuller{} return dm } + +type fakePodGetter struct { + pods map[types.UID]*api.Pod +} + +func newFakePodGetter() *fakePodGetter { + return &fakePodGetter{make(map[types.UID]*api.Pod)} +} + +func (f *fakePodGetter) GetPodByUID(uid types.UID) (*api.Pod, bool) { + pod, found := f.pods[uid] + return pod, found +} diff --git a/pkg/kubelet/dockertools/manager.go b/pkg/kubelet/dockertools/manager.go index 8a701f1d0ba..64f3ae833df 100644 --- a/pkg/kubelet/dockertools/manager.go +++ b/pkg/kubelet/dockertools/manager.go @@ -158,6 +158,11 @@ type DockerManager struct { configureHairpinMode bool } +// A subset of the pod.Manager interface extracted for testing purposes. +type podGetter interface { + GetPodByUID(types.UID) (*api.Pod, bool) +} + func PodInfraContainerEnv(env map[string]string) kubecontainer.Option { return func(rt kubecontainer.Runtime) { dm := rt.(*DockerManager) @@ -175,6 +180,7 @@ func NewDockerManager( recorder record.EventRecorder, livenessManager proberesults.Manager, containerRefManager *kubecontainer.RefManager, + podGetter podGetter, machineInfo *cadvisorapi.MachineInfo, podInfraContainerImage string, qps float32, @@ -232,7 +238,7 @@ func NewDockerManager( } else { dm.imagePuller = kubecontainer.NewImagePuller(kubecontainer.FilterEventRecorder(recorder), dm, imageBackOff) } - dm.containerGC = NewContainerGC(client, containerLogsDir) + dm.containerGC = NewContainerGC(client, podGetter, containerLogsDir) // apply optional settings.. for _, optf := range options { diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index d2ea20e200d..49a9b8458ef 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -131,6 +131,11 @@ const ( // error. It is also used as the base period for the exponential backoff // container restarts and image pulls. backOffPeriod = time.Second * 10 + + // Period for performing container garbage collection. + ContainerGCPeriod = time.Minute + // Period for performing image garbage collection. + ImageGCPeriod = 5 * time.Minute ) // SyncHandler is an interface implemented by Kubelet, for testability @@ -363,6 +368,7 @@ func NewMainKubelet( klet.livenessManager = proberesults.NewManager() klet.podCache = kubecontainer.NewCache() + klet.podManager = kubepod.NewBasicPodManager(kubepod.NewBasicMirrorClient(klet.kubeClient)) // The hairpin mode setting doesn't matter if: // - We're not using a bridge network. This is hard to check because we might @@ -405,6 +411,7 @@ func NewMainKubelet( kubecontainer.FilterEventRecorder(recorder), klet.livenessManager, containerRefManager, + klet.podManager, machineInfo, podInfraContainerImage, pullQPS, @@ -468,7 +475,6 @@ func NewMainKubelet( klet.imageManager = imageManager klet.runner = klet.containerRuntime - klet.podManager = kubepod.NewBasicPodManager(kubepod.NewBasicMirrorClient(klet.kubeClient)) klet.statusManager = status.NewManager(kubeClient, klet.podManager) klet.probeManager = prober.NewManager( @@ -934,13 +940,13 @@ func (kl *Kubelet) StartGarbageCollection() { if err := kl.containerGC.GarbageCollect(); err != nil { glog.Errorf("Container garbage collection failed: %v", err) } - }, time.Minute, wait.NeverStop) + }, ContainerGCPeriod, wait.NeverStop) go wait.Until(func() { if err := kl.imageManager.GarbageCollect(); err != nil { glog.Errorf("Image garbage collection failed: %v", err) } - }, 5*time.Minute, wait.NeverStop) + }, ImageGCPeriod, wait.NeverStop) } // initializeModules will initialize internal modules that do not require the container runtime to be up.