From ef015c3574b96f14d65c400ed406eeae65c1019f Mon Sep 17 00:00:00 2001 From: Mike Danese Date: Fri, 9 Oct 2015 09:55:49 -0700 Subject: [PATCH 1/2] add config-vm flag to enable terminated pod garbage collection. --- cluster/gce/config-test.sh | 1 + cluster/gce/configure-vm.sh | 5 +++++ cluster/gce/debian/helper.sh | 5 +++++ .../kube-controller-manager.manifest | 8 +++++++- 4 files changed, 18 insertions(+), 1 deletion(-) diff --git a/cluster/gce/config-test.sh b/cluster/gce/config-test.sh index 47bbc070047..8c12a6aced0 100755 --- a/cluster/gce/config-test.sh +++ b/cluster/gce/config-test.sh @@ -47,6 +47,7 @@ MASTER_IP_RANGE="${MASTER_IP_RANGE:-10.246.0.0/24}" MINION_SCOPES="${MINION_SCOPES:-compute-rw,monitoring,logging-write,storage-ro}" RUNTIME_CONFIG="${KUBE_RUNTIME_CONFIG:-}" ENABLE_EXPERIMENTAL_API="${KUBE_ENABLE_EXPERIMENTAL_API:-false}" +TERMINATED_POD_GC_THRESHOLD=${TERMINATED_POD_GC_THRESHOLD:-100} # Increase the sleep interval value if concerned about API rate limits. 3, in seconds, is the default. POLL_SLEEP_INTERVAL=3 diff --git a/cluster/gce/configure-vm.sh b/cluster/gce/configure-vm.sh index 54e749e6fe8..37488654dab 100755 --- a/cluster/gce/configure-vm.sh +++ b/cluster/gce/configure-vm.sh @@ -322,6 +322,11 @@ EOF if [ -n "${ENABLE_EXPERIMENTAL_API:-}" ]; then cat <>/srv/salt-overlay/pillar/cluster-params.sls enable_experimental_api: '$(echo "$ENABLE_EXPERIMENTAL_API" | sed -e "s/'/''/g")' +EOF + fi + if [ -n "${TERMINATED_POD_GC_THRESHOLD:-}" ]; then + cat <>/srv/salt-overlay/pillar/cluster-params.sls +terminated_pod_gc_threshold: '$(echo "${TERMINATED_POD_GC_THRESHOLD}" | sed -e "s/'/''/g")' EOF fi } diff --git a/cluster/gce/debian/helper.sh b/cluster/gce/debian/helper.sh index 7fa80edc869..c01848ddab1 100755 --- a/cluster/gce/debian/helper.sh +++ b/cluster/gce/debian/helper.sh @@ -64,6 +64,11 @@ EOF if [ -n "${KUBE_APISERVER_REQUEST_TIMEOUT:-}" ]; then cat >>$file <>$file < Date: Fri, 9 Oct 2015 09:56:30 -0700 Subject: [PATCH 2/2] add slow test for terminated pod garbage collection --- hack/jenkins/e2e.sh | 1 + test/e2e/garbage_collector.go | 82 +++++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) create mode 100644 test/e2e/garbage_collector.go diff --git a/hack/jenkins/e2e.sh b/hack/jenkins/e2e.sh index bcc8b3c482b..c912b7e75e0 100755 --- a/hack/jenkins/e2e.sh +++ b/hack/jenkins/e2e.sh @@ -148,6 +148,7 @@ GCE_SLOW_TESTS=( "Nodes\sResize" # 3 min 30 sec, file: resize_nodes.go, issue: #13323 "resource\susage\stracking" # 1 hour, file: kubelet_perf.go, slow by design "monotonically\sincreasing\srestart\scount" # 1.5 to 5 min, file: pods.go, slow by design + "Garbage\scollector\sshould" # 7 min, file: garbage_collector.go, slow by design "KubeProxy\sshould\stest\skube-proxy" # 9 min 30 sec, file: kubeproxy.go, issue: #14204 ) diff --git a/test/e2e/garbage_collector.go b/test/e2e/garbage_collector.go new file mode 100644 index 00000000000..a125a16dbdd --- /dev/null +++ b/test/e2e/garbage_collector.go @@ -0,0 +1,82 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + "time" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + + "k8s.io/kubernetes/pkg/api" + "k8s.io/kubernetes/pkg/fields" + "k8s.io/kubernetes/pkg/labels" + "k8s.io/kubernetes/pkg/util" +) + +// This test requires that --terminated-pod-gc-threshold=100 be set on the controller manager +var _ = Describe("Garbage collector", func() { + f := NewFramework("garbage-collector") + It("should handle the creation of 1000 pods", func() { + SkipUnlessProviderIs("gce") + + var count int + for count < 1000 { + pod, err := createTerminatingPod(f) + pod.ResourceVersion = "" + pod.Status.Phase = api.PodFailed + pod, err = f.Client.Pods(f.Namespace.Name).UpdateStatus(pod) + if err != nil { + Failf("err failing pod: %v", err) + } + + count++ + if count%50 == 0 { + Logf("count: %v", count) + } + } + + Logf("created: %v", count) + // This sleep has to be longer than the gcCheckPeriod defined + // in pkg/controller/gc/gc_controller.go which is currently + // 20 seconds. + time.Sleep(30 * time.Second) + + pods, err := f.Client.Pods(f.Namespace.Name).List(labels.Everything(), fields.Everything()) + Expect(err).NotTo(HaveOccurred()) + Expect(len(pods.Items)).To(BeNumerically("==", 100)) + }) +}) + +func createTerminatingPod(f *Framework) (*api.Pod, error) { + uuid := util.NewUUID() + pod := &api.Pod{ + ObjectMeta: api.ObjectMeta{ + Name: string(uuid), + }, + Spec: api.PodSpec{ + NodeName: "nonexistant-node", + Containers: []api.Container{ + { + Name: string(uuid), + Image: "beta.gcr.io/google_containers/busybox", + }, + }, + }, + } + return f.Client.Pods(f.Namespace.Name).Create(pod) +}