diff --git a/hack/.staticcheck_failures b/hack/.staticcheck_failures index 28f75e7c0c6..b6ee87ca44a 100644 --- a/hack/.staticcheck_failures +++ b/hack/.staticcheck_failures @@ -89,7 +89,6 @@ test/e2e/lifecycle/bootstrap test/e2e/manifest test/e2e/network test/e2e/node -test/e2e/scalability test/e2e/storage test/e2e/storage/drivers test/e2e/storage/testsuites diff --git a/test/e2e/BUILD b/test/e2e/BUILD index 1d43bcb7c33..83465e744b7 100644 --- a/test/e2e/BUILD +++ b/test/e2e/BUILD @@ -30,7 +30,6 @@ go_test( "//test/e2e/lifecycle/bootstrap:go_default_library", "//test/e2e/network:go_default_library", "//test/e2e/node:go_default_library", - "//test/e2e/scalability:go_default_library", "//test/e2e/scheduling:go_default_library", "//test/e2e/servicecatalog:go_default_library", "//test/e2e/storage:go_default_library", @@ -110,7 +109,6 @@ filegroup( "//test/e2e/network:all-srcs", "//test/e2e/node:all-srcs", "//test/e2e/perftype:all-srcs", - "//test/e2e/scalability:all-srcs", "//test/e2e/scheduling:all-srcs", "//test/e2e/servicecatalog:all-srcs", "//test/e2e/storage:all-srcs", diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index cc60cb1373a..eb6134c6ad9 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -50,7 +50,6 @@ import ( _ "k8s.io/kubernetes/test/e2e/lifecycle/bootstrap" _ "k8s.io/kubernetes/test/e2e/network" _ "k8s.io/kubernetes/test/e2e/node" - _ "k8s.io/kubernetes/test/e2e/scalability" _ "k8s.io/kubernetes/test/e2e/scheduling" _ "k8s.io/kubernetes/test/e2e/servicecatalog" _ "k8s.io/kubernetes/test/e2e/storage" diff --git a/test/e2e/scalability/BUILD b/test/e2e/scalability/BUILD deleted file mode 100644 index 05f097340f9..00000000000 --- a/test/e2e/scalability/BUILD +++ /dev/null @@ -1,64 +0,0 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") - -go_library( - name = "go_default_library", - srcs = [ - "common.go", - "density.go", - "framework.go", - "load.go", - ], - importpath = "k8s.io/kubernetes/test/e2e/scalability", - visibility = ["//visibility:public"], - deps = [ - "//pkg/apis/batch:go_default_library", - "//pkg/apis/core:go_default_library", - "//pkg/apis/extensions:go_default_library", - "//staging/src/k8s.io/api/core/v1:go_default_library", - "//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library", - "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", - "//staging/src/k8s.io/apimachinery/pkg/fields:go_default_library", - "//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library", - "//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library", - "//staging/src/k8s.io/apimachinery/pkg/runtime/schema:go_default_library", - "//staging/src/k8s.io/apimachinery/pkg/util/intstr:go_default_library", - "//staging/src/k8s.io/apimachinery/pkg/util/net:go_default_library", - "//staging/src/k8s.io/apimachinery/pkg/util/uuid:go_default_library", - "//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library", - "//staging/src/k8s.io/apimachinery/pkg/watch:go_default_library", - "//staging/src/k8s.io/client-go/discovery:go_default_library", - "//staging/src/k8s.io/client-go/discovery/cached/memory:go_default_library", - "//staging/src/k8s.io/client-go/dynamic:go_default_library", - "//staging/src/k8s.io/client-go/kubernetes:go_default_library", - "//staging/src/k8s.io/client-go/kubernetes/scheme:go_default_library", - "//staging/src/k8s.io/client-go/rest:go_default_library", - "//staging/src/k8s.io/client-go/restmapper:go_default_library", - "//staging/src/k8s.io/client-go/scale:go_default_library", - "//staging/src/k8s.io/client-go/tools/cache:go_default_library", - "//staging/src/k8s.io/client-go/transport:go_default_library", - "//staging/src/k8s.io/client-go/util/workqueue:go_default_library", - "//test/e2e/framework:go_default_library", - "//test/e2e/framework/metrics:go_default_library", - "//test/e2e/framework/node:go_default_library", - "//test/e2e/framework/pod:go_default_library", - "//test/e2e/framework/timer:go_default_library", - "//test/utils:go_default_library", - "//test/utils/image:go_default_library", - "//vendor/github.com/onsi/ginkgo:go_default_library", - "//vendor/github.com/onsi/gomega:go_default_library", - ], -) - -filegroup( - name = "package-srcs", - srcs = glob(["**"]), - tags = ["automanaged"], - visibility = ["//visibility:private"], -) - -filegroup( - name = "all-srcs", - srcs = [":package-srcs"], - tags = ["automanaged"], - visibility = ["//visibility:public"], -) diff --git a/test/e2e/scalability/OWNERS b/test/e2e/scalability/OWNERS deleted file mode 100644 index f0717a5361c..00000000000 --- a/test/e2e/scalability/OWNERS +++ /dev/null @@ -1,11 +0,0 @@ -# See the OWNERS docs at https://go.k8s.io/owners - -approvers: -- gmarek -- shyamjvs -- wojtek-t -reviewers: -- gmarek -- shyamjvs -- timothysc -- wojtek-t diff --git a/test/e2e/scalability/common.go b/test/e2e/scalability/common.go deleted file mode 100644 index 4fac86cb03d..00000000000 --- a/test/e2e/scalability/common.go +++ /dev/null @@ -1,24 +0,0 @@ -/* -Copyright 2018 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package scalability - -import "time" - -const ( - // UnreadyNodeToleration denotes time the node can be unreachable/not ready. - UnreadyNodeToleration = 15 * time.Minute -) diff --git a/test/e2e/scalability/density.go b/test/e2e/scalability/density.go deleted file mode 100644 index 2ad1805219f..00000000000 --- a/test/e2e/scalability/density.go +++ /dev/null @@ -1,1065 +0,0 @@ -/* -Copyright 2015 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -// DEPRECATED. -// We already migrated all periodic and presubmit tests to ClusterLoader2. -// We are still keeping this file for optional functionality, but once -// this is supported in ClusterLoader2 tests, this test will be removed -// (hopefully in 1.16 release). -// Please don't add new functionality to this file and instead see: -// https://github.com/kubernetes/perf-tests/tree/master/clusterloader2 - -package scalability - -import ( - "context" - "fmt" - "math" - "os" - "sort" - "strconv" - "sync" - "time" - - v1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/resource" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/fields" - "k8s.io/apimachinery/pkg/labels" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/runtime/schema" - utiluuid "k8s.io/apimachinery/pkg/util/uuid" - "k8s.io/apimachinery/pkg/watch" - clientset "k8s.io/client-go/kubernetes" - scaleclient "k8s.io/client-go/scale" - "k8s.io/client-go/tools/cache" - "k8s.io/client-go/util/workqueue" - "k8s.io/kubernetes/pkg/apis/batch" - api "k8s.io/kubernetes/pkg/apis/core" - "k8s.io/kubernetes/pkg/apis/extensions" - "k8s.io/kubernetes/test/e2e/framework" - e2emetrics "k8s.io/kubernetes/test/e2e/framework/metrics" - e2enode "k8s.io/kubernetes/test/e2e/framework/node" - e2epod "k8s.io/kubernetes/test/e2e/framework/pod" - "k8s.io/kubernetes/test/e2e/framework/timer" - testutils "k8s.io/kubernetes/test/utils" - imageutils "k8s.io/kubernetes/test/utils/image" - - "github.com/onsi/ginkgo" - "github.com/onsi/gomega" -) - -const ( - // PodStartupLatencyThreshold holds the latency threashold for pod startup - PodStartupLatencyThreshold = 5 * time.Second - // MinSaturationThreshold holds the minimum staturation threashold - MinSaturationThreshold = 2 * time.Minute - // MinPodsPerSecondThroughput holds the minimum pod/sec throughput - MinPodsPerSecondThroughput = 8 - // DensityPollInterval holds the desity of polling interval - DensityPollInterval = 10 * time.Second - // MinPodStartupMeasurements holds the minimum number of measurements related to pod-startup - MinPodStartupMeasurements = 500 -) - -// MaxContainerFailures holds the maximum container failures this test tolerates before failing. -var MaxContainerFailures = 0 - -// MaxMissingPodStartupMeasurements holds the maximum number of missing measurements related to pod-startup that the test tolerates. -var MaxMissingPodStartupMeasurements = 0 - -// Number of nodes in the cluster (computed inside BeforeEach). -var nodeCount = 0 - -// DensityTestConfig holds the configurations for e2e scalability tests -type DensityTestConfig struct { - Configs []testutils.RunObjectConfig - ClientSets []clientset.Interface - ScaleClients []scaleclient.ScalesGetter - PollInterval time.Duration - PodCount int - // What kind of resource we want to create - kind schema.GroupKind - SecretConfigs []*testutils.SecretConfig - ConfigMapConfigs []*testutils.ConfigMapConfig - DaemonConfigs []*testutils.DaemonConfig -} - -type saturationTime struct { - TimeToSaturate time.Duration `json:"timeToSaturate"` - NumberOfNodes int `json:"numberOfNodes"` - NumberOfPods int `json:"numberOfPods"` - Throughput float32 `json:"throughput"` -} - -func (dtc *DensityTestConfig) runSecretConfigs(testPhase *timer.Phase) { - defer testPhase.End() - for _, sc := range dtc.SecretConfigs { - sc.Run() - } -} - -func (dtc *DensityTestConfig) runConfigMapConfigs(testPhase *timer.Phase) { - defer testPhase.End() - for _, cmc := range dtc.ConfigMapConfigs { - cmc.Run() - } -} - -func (dtc *DensityTestConfig) runDaemonConfigs(testPhase *timer.Phase) { - defer testPhase.End() - for _, dc := range dtc.DaemonConfigs { - dc.Run() - } -} - -func (dtc *DensityTestConfig) deleteSecrets(testPhase *timer.Phase) { - defer testPhase.End() - for i := range dtc.SecretConfigs { - dtc.SecretConfigs[i].Stop() - } -} - -func (dtc *DensityTestConfig) deleteConfigMaps(testPhase *timer.Phase) { - defer testPhase.End() - for i := range dtc.ConfigMapConfigs { - dtc.ConfigMapConfigs[i].Stop() - } -} - -func (dtc *DensityTestConfig) deleteDaemonSets(numberOfClients int, testPhase *timer.Phase) { - defer testPhase.End() - for i := range dtc.DaemonConfigs { - framework.ExpectNoError(framework.DeleteResourceAndWaitForGC( - dtc.ClientSets[i%numberOfClients], - extensions.Kind("DaemonSet"), - dtc.DaemonConfigs[i].Namespace, - dtc.DaemonConfigs[i].Name, - )) - } -} - -func density30AddonResourceVerifier(numNodes int) map[string]framework.ResourceConstraint { - var apiserverMem uint64 - var controllerMem uint64 - var schedulerMem uint64 - apiserverCPU := math.MaxFloat32 - apiserverMem = math.MaxUint64 - controllerCPU := math.MaxFloat32 - controllerMem = math.MaxUint64 - schedulerCPU := math.MaxFloat32 - schedulerMem = math.MaxUint64 - framework.Logf("Setting resource constraints for provider: %s", framework.TestContext.Provider) - if framework.ProviderIs("kubemark") { - if numNodes <= 5 { - apiserverCPU = 0.35 - apiserverMem = 150 * (1024 * 1024) - controllerCPU = 0.15 - controllerMem = 100 * (1024 * 1024) - schedulerCPU = 0.05 - schedulerMem = 50 * (1024 * 1024) - } else if numNodes <= 100 { - apiserverCPU = 1.5 - apiserverMem = 1500 * (1024 * 1024) - controllerCPU = 0.5 - controllerMem = 500 * (1024 * 1024) - schedulerCPU = 0.4 - schedulerMem = 180 * (1024 * 1024) - } else if numNodes <= 500 { - apiserverCPU = 3.5 - apiserverMem = 3400 * (1024 * 1024) - controllerCPU = 1.3 - controllerMem = 1100 * (1024 * 1024) - schedulerCPU = 1.5 - schedulerMem = 500 * (1024 * 1024) - } else if numNodes <= 1000 { - apiserverCPU = 5.5 - apiserverMem = 4000 * (1024 * 1024) - controllerCPU = 3 - controllerMem = 2000 * (1024 * 1024) - schedulerCPU = 1.5 - schedulerMem = 750 * (1024 * 1024) - } - } else { - if numNodes <= 100 { - apiserverCPU = 2.2 - apiserverMem = 1700 * (1024 * 1024) - controllerCPU = 0.8 - controllerMem = 530 * (1024 * 1024) - schedulerCPU = 0.4 - schedulerMem = 180 * (1024 * 1024) - } - } - - constraints := make(map[string]framework.ResourceConstraint) - constraints["fluentd-elasticsearch"] = framework.ResourceConstraint{ - CPUConstraint: 0.2, - MemoryConstraint: 250 * (1024 * 1024), - } - constraints["elasticsearch-logging"] = framework.ResourceConstraint{ - CPUConstraint: 2, - // TODO: bring it down to 750MB again, when we lower Kubelet verbosity level. I.e. revert #19164 - MemoryConstraint: 5000 * (1024 * 1024), - } - constraints["heapster"] = framework.ResourceConstraint{ - CPUConstraint: 2, - MemoryConstraint: 1800 * (1024 * 1024), - } - constraints["kibana-logging"] = framework.ResourceConstraint{ - CPUConstraint: 0.2, - MemoryConstraint: 100 * (1024 * 1024), - } - constraints["kube-proxy"] = framework.ResourceConstraint{ - CPUConstraint: 0.15, - MemoryConstraint: 100 * (1024 * 1024), - } - constraints["l7-lb-controller"] = framework.ResourceConstraint{ - CPUConstraint: 0.2 + 0.00015*float64(numNodes), - MemoryConstraint: (75 + uint64(math.Ceil(0.8*float64(numNodes)))) * (1024 * 1024), - } - constraints["influxdb"] = framework.ResourceConstraint{ - CPUConstraint: 2, - MemoryConstraint: 500 * (1024 * 1024), - } - constraints["kube-apiserver"] = framework.ResourceConstraint{ - CPUConstraint: apiserverCPU, - MemoryConstraint: apiserverMem, - } - constraints["kube-controller-manager"] = framework.ResourceConstraint{ - CPUConstraint: controllerCPU, - MemoryConstraint: controllerMem, - } - constraints["kube-scheduler"] = framework.ResourceConstraint{ - CPUConstraint: schedulerCPU, - MemoryConstraint: schedulerMem, - } - constraints["coredns"] = framework.ResourceConstraint{ - CPUConstraint: framework.NoCPUConstraint, - MemoryConstraint: 170 * (1024 * 1024), - } - constraints["kubedns"] = framework.ResourceConstraint{ - CPUConstraint: framework.NoCPUConstraint, - MemoryConstraint: 170 * (1024 * 1024), - } - return constraints -} - -func computeAverage(sample []float64) float64 { - sum := 0.0 - for _, value := range sample { - sum += value - } - return sum / float64(len(sample)) -} - -func computeQuantile(sample []float64, quantile float64) float64 { - framework.ExpectEqual(sort.Float64sAreSorted(sample), true) - framework.ExpectEqual(quantile >= 0.0 && quantile <= 1.0, true) - index := int(quantile*float64(len(sample))) - 1 - if index < 0 { - return math.NaN() - } - return sample[index] -} - -func logPodStartupStatus( - c clientset.Interface, - expectedPods int, - observedLabels map[string]string, - period time.Duration, - scheduleThroughputs *[]float64, - stopCh chan struct{}) { - - label := labels.SelectorFromSet(labels.Set(observedLabels)) - podStore, err := testutils.NewPodStore(c, metav1.NamespaceAll, label, fields.Everything()) - framework.ExpectNoError(err) - defer podStore.Stop() - - ticker := time.NewTicker(period) - startupStatus := testutils.ComputeRCStartupStatus(podStore.List(), expectedPods) - lastScheduledCount := startupStatus.Scheduled - defer ticker.Stop() - for { - select { - case <-ticker.C: - case <-stopCh: - return - } - // Log status of the pods. - startupStatus := testutils.ComputeRCStartupStatus(podStore.List(), expectedPods) - framework.Logf(startupStatus.String("Density")) - // Compute scheduling throughput for the latest time period. - throughput := float64(startupStatus.Scheduled-lastScheduledCount) / float64(period/time.Second) - *scheduleThroughputs = append(*scheduleThroughputs, throughput) - lastScheduledCount = startupStatus.Scheduled - } -} - -// runDensityTest will perform a density test and return the time it took for -// all pods to start -func runDensityTest(dtc DensityTestConfig, testPhaseDurations *timer.TestPhaseTimer, scheduleThroughputs *[]float64) time.Duration { - defer ginkgo.GinkgoRecover() - - // Create all secrets, configmaps and daemons. - dtc.runSecretConfigs(testPhaseDurations.StartPhase(250, "secrets creation")) - dtc.runConfigMapConfigs(testPhaseDurations.StartPhase(260, "configmaps creation")) - dtc.runDaemonConfigs(testPhaseDurations.StartPhase(270, "daemonsets creation")) - - replicationCtrlStartupPhase := testPhaseDurations.StartPhase(300, "saturation pods creation") - defer replicationCtrlStartupPhase.End() - - // Start scheduler CPU profile-gatherer before we begin cluster saturation. - profileGatheringDelay := time.Duration(1+nodeCount/100) * time.Minute - schedulerProfilingStopCh := framework.StartCPUProfileGatherer("kube-scheduler", "density", profileGatheringDelay) - - // Start all replication controllers. - startTime := time.Now() - wg := sync.WaitGroup{} - wg.Add(len(dtc.Configs)) - for i := range dtc.Configs { - config := dtc.Configs[i] - go func() { - defer ginkgo.GinkgoRecover() - // Call wg.Done() in defer to avoid blocking whole test - // in case of error from RunRC. - defer wg.Done() - framework.ExpectNoError(config.Run()) - }() - } - logStopCh := make(chan struct{}) - go logPodStartupStatus(dtc.ClientSets[0], dtc.PodCount, map[string]string{"type": "densityPod"}, dtc.PollInterval, scheduleThroughputs, logStopCh) - wg.Wait() - startupTime := time.Since(startTime) - close(logStopCh) - close(schedulerProfilingStopCh) - framework.Logf("E2E startup time for %d pods: %v", dtc.PodCount, startupTime) - framework.Logf("Throughput (pods/s) during cluster saturation phase: %v", float32(dtc.PodCount)/float32(startupTime/time.Second)) - replicationCtrlStartupPhase.End() - - // Grabbing scheduler memory profile after cluster saturation finished. - wg.Add(1) - framework.GatherMemoryProfile("kube-scheduler", "density", &wg) - wg.Wait() - - printPodAllocationPhase := testPhaseDurations.StartPhase(400, "printing pod allocation") - defer printPodAllocationPhase.End() - // Print some data about Pod to Node allocation - ginkgo.By("Printing Pod to Node allocation data") - podList, err := dtc.ClientSets[0].CoreV1().Pods(metav1.NamespaceAll).List(metav1.ListOptions{}) - framework.ExpectNoError(err) - pausePodAllocation := make(map[string]int) - systemPodAllocation := make(map[string][]string) - for _, pod := range podList.Items { - if pod.Namespace == metav1.NamespaceSystem { - systemPodAllocation[pod.Spec.NodeName] = append(systemPodAllocation[pod.Spec.NodeName], pod.Name) - } else { - pausePodAllocation[pod.Spec.NodeName]++ - } - } - nodeNames := make([]string, 0) - for k := range pausePodAllocation { - nodeNames = append(nodeNames, k) - } - sort.Strings(nodeNames) - for _, node := range nodeNames { - framework.Logf("%v: %v pause pods, system pods: %v", node, pausePodAllocation[node], systemPodAllocation[node]) - } - defer printPodAllocationPhase.End() - return startupTime -} - -func cleanupDensityTest(dtc DensityTestConfig, testPhaseDurations *timer.TestPhaseTimer) { - defer ginkgo.GinkgoRecover() - podCleanupPhase := testPhaseDurations.StartPhase(900, "latency pods deletion") - defer podCleanupPhase.End() - ginkgo.By("Deleting created Collections") - numberOfClients := len(dtc.ClientSets) - // We explicitly delete all pods to have API calls necessary for deletion accounted in metrics. - for i := range dtc.Configs { - name := dtc.Configs[i].GetName() - namespace := dtc.Configs[i].GetNamespace() - kind := dtc.Configs[i].GetKind() - ginkgo.By(fmt.Sprintf("Cleaning up only the %v, garbage collector will clean up the pods", kind)) - err := framework.DeleteResourceAndWaitForGC(dtc.ClientSets[i%numberOfClients], kind, namespace, name) - framework.ExpectNoError(err) - } - podCleanupPhase.End() - - dtc.deleteSecrets(testPhaseDurations.StartPhase(910, "secrets deletion")) - dtc.deleteConfigMaps(testPhaseDurations.StartPhase(920, "configmaps deletion")) - dtc.deleteDaemonSets(numberOfClients, testPhaseDurations.StartPhase(930, "daemonsets deletion")) -} - -// This test suite can take a long time to run, and can affect or be affected by other tests. -// So by default it is added to the ginkgo.skip list (see driver.go). -// To run this suite you must explicitly ask for it by setting the -// -t/--test flag or ginkgo.focus flag. -// IMPORTANT: This test is designed to work on large (>= 100 Nodes) clusters. For smaller ones -// results will not be representative for control-plane performance as we'll start hitting -// limits on Docker's concurrent container startup. -var _ = SIGDescribe("Density", func() { - var c clientset.Interface - var additionalPodsPrefix string - var ns string - var uuid string - var e2eStartupTime time.Duration - var totalPods int - var nodeCPUCapacity int64 - var nodeMemCapacity int64 - var nodes *v1.NodeList - var scheduleThroughputs []float64 - - testCaseBaseName := "density" - missingMeasurements := 0 - var testPhaseDurations *timer.TestPhaseTimer - var profileGathererStopCh chan struct{} - var etcdMetricsCollector *e2emetrics.EtcdMetricsCollector - - // Gathers data prior to framework namespace teardown - ginkgo.AfterEach(func() { - // Stop apiserver CPU profile gatherer and gather memory allocations profile. - close(profileGathererStopCh) - wg := sync.WaitGroup{} - wg.Add(1) - framework.GatherMemoryProfile("kube-apiserver", "density", &wg) - wg.Wait() - - saturationThreshold := time.Duration((totalPods / MinPodsPerSecondThroughput)) * time.Second - if saturationThreshold < MinSaturationThreshold { - saturationThreshold = MinSaturationThreshold - } - gomega.Expect(e2eStartupTime).NotTo(gomega.BeNumerically(">", saturationThreshold)) - saturationData := saturationTime{ - TimeToSaturate: e2eStartupTime, - NumberOfNodes: nodeCount, - NumberOfPods: totalPods, - Throughput: float32(totalPods) / float32(e2eStartupTime/time.Second), - } - framework.Logf("Cluster saturation time: %s", framework.PrettyPrintJSON(saturationData)) - - summaries := make([]framework.TestDataSummary, 0, 2) - // Verify latency metrics. - highLatencyRequests, metrics, err := e2emetrics.HighLatencyRequests(c, nodeCount) - framework.ExpectNoError(err) - if err == nil { - summaries = append(summaries, metrics) - } - - // Summarize scheduler metrics. - latency, err := e2emetrics.VerifySchedulerLatency(c, framework.TestContext.Provider, framework.TestContext.CloudConfig.MasterName, framework.GetMasterHost()) - framework.ExpectNoError(err) - if err == nil { - // Compute avg and quantiles of throughput (excluding last element, that's usually an outlier). - sampleSize := len(scheduleThroughputs) - if sampleSize > 1 { - scheduleThroughputs = scheduleThroughputs[:sampleSize-1] - sort.Float64s(scheduleThroughputs) - latency.ThroughputAverage = computeAverage(scheduleThroughputs) - latency.ThroughputPerc50 = computeQuantile(scheduleThroughputs, 0.5) - latency.ThroughputPerc90 = computeQuantile(scheduleThroughputs, 0.9) - latency.ThroughputPerc99 = computeQuantile(scheduleThroughputs, 0.99) - } - summaries = append(summaries, latency) - } - - // Summarize etcd metrics. - err = etcdMetricsCollector.StopAndSummarize(framework.TestContext.Provider, framework.GetMasterHost()) - framework.ExpectNoError(err) - if err == nil { - summaries = append(summaries, etcdMetricsCollector.GetMetrics()) - } - - summaries = append(summaries, testPhaseDurations) - - framework.PrintSummaries(summaries, testCaseBaseName) - - // Fail if there were some high-latency requests. - gomega.Expect(highLatencyRequests).NotTo(gomega.BeNumerically(">", 0), "There should be no high-latency requests") - // Fail if more than the allowed threshold of measurements were missing in the latencyTest. - framework.ExpectEqual(missingMeasurements <= MaxMissingPodStartupMeasurements, true) - }) - - options := framework.Options{ - ClientQPS: 50.0, - ClientBurst: 100, - } - // Explicitly put here, to delete namespace at the end of the test - // (after measuring latency metrics, etc.). - f := framework.NewFramework(testCaseBaseName, options, nil) - f.NamespaceDeletionTimeout = time.Hour - - ginkgo.BeforeEach(func() { - // Gathering the metrics currently uses a path which uses SSH. - framework.SkipUnlessSSHKeyPresent() - - var err error - c = f.ClientSet - ns = f.Namespace.Name - testPhaseDurations = timer.NewTestPhaseTimer() - - // This is used to mimic what new service account token volumes will - // eventually look like. We can remove this once the controller manager - // publishes the root CA certificate to each namespace. - c.CoreV1().ConfigMaps(ns).Create(&v1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{ - Name: "kube-root-ca-crt", - }, - Data: map[string]string{ - "ca.crt": "trust me, i'm a ca.crt", - }, - }) - - _, nodes, err = e2enode.GetMasterAndWorkerNodes(c) - if err != nil { - framework.Logf("Unexpected error occurred: %v", err) - } - // TODO: write a wrapper for ExpectNoErrorWithOffset() - framework.ExpectNoErrorWithOffset(0, err) - nodeCount = len(nodes.Items) - gomega.Expect(nodeCount).NotTo(gomega.BeZero()) - - // Compute node capacity, leaving some slack for addon pods. - nodeCPUCapacity = nodes.Items[0].Status.Allocatable.Cpu().MilliValue() - 100 - nodeMemCapacity = nodes.Items[0].Status.Allocatable.Memory().Value() - 100*1024*1024 - - // Terminating a namespace (deleting the remaining objects from it - which - // generally means events) can affect the current run. Thus we wait for all - // terminating namespace to be finally deleted before starting this test. - err = framework.CheckTestingNSDeletedExcept(c, ns) - framework.ExpectNoError(err) - - uuid = string(utiluuid.NewUUID()) - - framework.ExpectNoError(e2emetrics.ResetSchedulerMetrics(c, framework.TestContext.Provider, framework.TestContext.CloudConfig.MasterName, framework.GetMasterHost())) - framework.ExpectNoError(e2emetrics.ResetMetrics(c)) - framework.ExpectNoError(os.Mkdir(fmt.Sprintf(framework.TestContext.OutputDir+"/%s", uuid), 0777)) - - framework.Logf("Listing nodes for easy debugging:\n") - for _, node := range nodes.Items { - var internalIP, externalIP string - for _, address := range node.Status.Addresses { - if address.Type == v1.NodeInternalIP { - internalIP = address.Address - } - if address.Type == v1.NodeExternalIP { - externalIP = address.Address - } - } - framework.Logf("Name: %v, clusterIP: %v, externalIP: %v", node.ObjectMeta.Name, internalIP, externalIP) - } - - // Start apiserver CPU profile gatherer with frequency based on cluster size. - profileGatheringDelay := time.Duration(5+nodeCount/100) * time.Minute - profileGathererStopCh = framework.StartCPUProfileGatherer("kube-apiserver", "density", profileGatheringDelay) - - // Start etcs metrics collection. - etcdMetricsCollector = e2emetrics.NewEtcdMetricsCollector() - etcdMetricsCollector.StartCollecting(time.Minute, framework.TestContext.Provider, framework.GetMasterHost()) - }) - - type Density struct { - // Controls if e2e latency tests should be run (they are slow) - runLatencyTest bool - podsPerNode int - // Controls how often the apiserver is polled for pods - interval time.Duration - // What kind of resource we should be creating. Default: ReplicationController - kind schema.GroupKind - secretsPerPod int - configMapsPerPod int - svcacctTokenProjectionsPerPod int - daemonsPerNode int - quotas bool - } - - densityTests := []Density{ - // TODO: Expose runLatencyTest as ginkgo flag. - {podsPerNode: 3, runLatencyTest: false, kind: api.Kind("ReplicationController")}, - {podsPerNode: 30, runLatencyTest: true, kind: api.Kind("ReplicationController")}, - {podsPerNode: 50, runLatencyTest: false, kind: api.Kind("ReplicationController")}, - {podsPerNode: 95, runLatencyTest: true, kind: api.Kind("ReplicationController")}, - {podsPerNode: 100, runLatencyTest: false, kind: api.Kind("ReplicationController")}, - // Tests for other resource types: - {podsPerNode: 30, runLatencyTest: true, kind: extensions.Kind("Deployment")}, - {podsPerNode: 30, runLatencyTest: true, kind: batch.Kind("Job")}, - // Test scheduling when daemons are preset - {podsPerNode: 30, runLatencyTest: true, kind: api.Kind("ReplicationController"), daemonsPerNode: 2}, - // Test with secrets - {podsPerNode: 30, runLatencyTest: true, kind: extensions.Kind("Deployment"), secretsPerPod: 2}, - // Test with configmaps - {podsPerNode: 30, runLatencyTest: true, kind: extensions.Kind("Deployment"), configMapsPerPod: 2}, - // Test with service account projected volumes - {podsPerNode: 30, runLatencyTest: true, kind: extensions.Kind("Deployment"), svcacctTokenProjectionsPerPod: 2}, - // Test with quotas - {podsPerNode: 30, runLatencyTest: true, kind: api.Kind("ReplicationController"), quotas: true}, - } - - isCanonical := func(test *Density) bool { - return test.kind == api.Kind("ReplicationController") && test.daemonsPerNode == 0 && test.secretsPerPod == 0 && test.configMapsPerPod == 0 && !test.quotas - } - - for _, testArg := range densityTests { - feature := "ManualPerformance" - switch testArg.podsPerNode { - case 30: - if isCanonical(&testArg) { - feature = "Performance" - } - case 95: - feature = "HighDensityPerformance" - } - - name := fmt.Sprintf("[Feature:%s] should allow starting %d pods per node using %v with %v secrets, %v configmaps, %v token projections, and %v daemons", - feature, - testArg.podsPerNode, - testArg.kind, - testArg.secretsPerPod, - testArg.configMapsPerPod, - testArg.svcacctTokenProjectionsPerPod, - testArg.daemonsPerNode, - ) - if testArg.quotas { - name += " with quotas" - } - itArg := testArg - ginkgo.It(name, func() { - nodePrepPhase := testPhaseDurations.StartPhase(100, "node preparation") - defer nodePrepPhase.End() - nodePreparer := framework.NewE2ETestNodePreparer( - f.ClientSet, - []testutils.CountToStrategy{{Count: nodeCount, Strategy: &testutils.TrivialNodePrepareStrategy{}}}, - ) - framework.ExpectNoError(nodePreparer.PrepareNodes()) - defer nodePreparer.CleanupNodes() - - podsPerNode := itArg.podsPerNode - if podsPerNode == 30 { - f.AddonResourceConstraints = func() map[string]framework.ResourceConstraint { return density30AddonResourceVerifier(nodeCount) }() - } - totalPods = (podsPerNode - itArg.daemonsPerNode) * nodeCount - fileHndl, err := os.Create(fmt.Sprintf(framework.TestContext.OutputDir+"/%s/pod_states.csv", uuid)) - framework.ExpectNoError(err) - defer fileHndl.Close() - nodePrepPhase.End() - - // nodeCountPerNamespace and CreateNamespaces are defined in load.go - numberOfCollections := (nodeCount + nodeCountPerNamespace - 1) / nodeCountPerNamespace - namespaces, err := CreateNamespaces(f, numberOfCollections, fmt.Sprintf("density-%v", testArg.podsPerNode), testPhaseDurations.StartPhase(200, "namespace creation")) - framework.ExpectNoError(err) - if itArg.quotas { - framework.ExpectNoError(CreateQuotas(f, namespaces, totalPods+nodeCount, testPhaseDurations.StartPhase(210, "quota creation"))) - } - - configs := make([]testutils.RunObjectConfig, numberOfCollections) - secretConfigs := make([]*testutils.SecretConfig, 0, numberOfCollections*itArg.secretsPerPod) - configMapConfigs := make([]*testutils.ConfigMapConfig, 0, numberOfCollections*itArg.configMapsPerPod) - // Since all RCs are created at the same time, timeout for each config - // has to assume that it will be run at the very end. - podThroughput := 20 - timeout := time.Duration(totalPods/podThroughput) * time.Second - if timeout < UnreadyNodeToleration { - timeout = UnreadyNodeToleration - } - timeout += 3 * time.Minute - // createClients is defined in load.go - clients, scalesClients, err := createClients(numberOfCollections) - framework.ExpectNoError(err) - for i := 0; i < numberOfCollections; i++ { - nsName := namespaces[i].Name - secretNames := []string{} - for j := 0; j < itArg.secretsPerPod; j++ { - secretName := fmt.Sprintf("density-secret-%v-%v", i, j) - secretConfigs = append(secretConfigs, &testutils.SecretConfig{ - Content: map[string]string{"foo": "bar"}, - Client: clients[i], - Name: secretName, - Namespace: nsName, - LogFunc: framework.Logf, - }) - secretNames = append(secretNames, secretName) - } - configMapNames := []string{} - for j := 0; j < itArg.configMapsPerPod; j++ { - configMapName := fmt.Sprintf("density-configmap-%v-%v", i, j) - configMapConfigs = append(configMapConfigs, &testutils.ConfigMapConfig{ - Content: map[string]string{"foo": "bar"}, - Client: clients[i], - Name: configMapName, - Namespace: nsName, - LogFunc: framework.Logf, - }) - configMapNames = append(configMapNames, configMapName) - } - name := fmt.Sprintf("density%v-%v-%v", totalPods, i, uuid) - baseConfig := &testutils.RCConfig{ - Client: clients[i], - ScalesGetter: scalesClients[i], - Image: imageutils.GetPauseImageName(), - Name: name, - Namespace: nsName, - Labels: map[string]string{"type": "densityPod"}, - PollInterval: DensityPollInterval, - Timeout: timeout, - PodStatusFile: fileHndl, - Replicas: (totalPods + numberOfCollections - 1) / numberOfCollections, - CpuRequest: nodeCPUCapacity / 100, - MemRequest: nodeMemCapacity / 100, - MaxContainerFailures: &MaxContainerFailures, - Silent: true, - LogFunc: framework.Logf, - SecretNames: secretNames, - ConfigMapNames: configMapNames, - ServiceAccountTokenProjections: itArg.svcacctTokenProjectionsPerPod, - Tolerations: []v1.Toleration{ - { - Key: "node.kubernetes.io/not-ready", - Operator: v1.TolerationOpExists, - Effect: v1.TaintEffectNoExecute, - TolerationSeconds: func(i int64) *int64 { return &i }(int64(UnreadyNodeToleration / time.Second)), - }, { - Key: "node.kubernetes.io/unreachable", - Operator: v1.TolerationOpExists, - Effect: v1.TaintEffectNoExecute, - TolerationSeconds: func(i int64) *int64 { return &i }(int64(UnreadyNodeToleration / time.Second)), - }, - }, - } - switch itArg.kind { - case api.Kind("ReplicationController"): - configs[i] = baseConfig - case extensions.Kind("ReplicaSet"): - configs[i] = &testutils.ReplicaSetConfig{RCConfig: *baseConfig} - case extensions.Kind("Deployment"): - configs[i] = &testutils.DeploymentConfig{RCConfig: *baseConfig} - case batch.Kind("Job"): - configs[i] = &testutils.JobConfig{RCConfig: *baseConfig} - default: - framework.Failf("Unsupported kind: %v", itArg.kind) - } - } - - // Single client is running out of http2 connections in delete phase, hence we need more. - clients, scalesClients, err = createClients(2) - framework.ExpectNoError(err) - dConfig := DensityTestConfig{ - ClientSets: clients, - ScaleClients: scalesClients, - Configs: configs, - PodCount: totalPods, - PollInterval: DensityPollInterval, - kind: itArg.kind, - SecretConfigs: secretConfigs, - ConfigMapConfigs: configMapConfigs, - } - - for i := 0; i < itArg.daemonsPerNode; i++ { - dConfig.DaemonConfigs = append(dConfig.DaemonConfigs, - &testutils.DaemonConfig{ - Client: f.ClientSet, - Name: fmt.Sprintf("density-daemon-%v", i), - Namespace: f.Namespace.Name, - LogFunc: framework.Logf, - }) - } - e2eStartupTime = runDensityTest(dConfig, testPhaseDurations, &scheduleThroughputs) - defer cleanupDensityTest(dConfig, testPhaseDurations) - - if itArg.runLatencyTest { - // Pick latencyPodsIterations so that: - // latencyPodsIterations * nodeCount >= MinPodStartupMeasurements. - latencyPodsIterations := (MinPodStartupMeasurements + nodeCount - 1) / nodeCount - ginkgo.By(fmt.Sprintf("Scheduling additional %d Pods to measure startup latencies", latencyPodsIterations*nodeCount)) - - createTimes := make(map[string]metav1.Time, 0) - nodeNames := make(map[string]string, 0) - scheduleTimes := make(map[string]metav1.Time, 0) - runTimes := make(map[string]metav1.Time, 0) - watchTimes := make(map[string]metav1.Time, 0) - - var mutex sync.Mutex - checkPod := func(p *v1.Pod) { - mutex.Lock() - defer mutex.Unlock() - defer ginkgo.GinkgoRecover() - - if p.Status.Phase == v1.PodRunning { - if _, found := watchTimes[p.Name]; !found { - watchTimes[p.Name] = metav1.Now() - createTimes[p.Name] = p.CreationTimestamp - nodeNames[p.Name] = p.Spec.NodeName - var startTime metav1.Time - for _, cs := range p.Status.ContainerStatuses { - if cs.State.Running != nil { - if startTime.Before(&cs.State.Running.StartedAt) { - startTime = cs.State.Running.StartedAt - } - } - } - if startTime != metav1.NewTime(time.Time{}) { - runTimes[p.Name] = startTime - } else { - framework.Failf("Pod %v is reported to be running, but none of its containers is", p.Name) - } - } - } - } - - additionalPodsPrefix = "density-latency-pod" - stopCh := make(chan struct{}) - - latencyPodStores := make([]cache.Store, len(namespaces)) - for i := 0; i < len(namespaces); i++ { - nsName := namespaces[i].Name - latencyPodsStore, controller := cache.NewInformer( - &cache.ListWatch{ - ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { - options.LabelSelector = labels.SelectorFromSet(labels.Set{"type": additionalPodsPrefix}).String() - obj, err := c.CoreV1().Pods(nsName).List(options) - return runtime.Object(obj), err - }, - WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { - options.LabelSelector = labels.SelectorFromSet(labels.Set{"type": additionalPodsPrefix}).String() - return c.CoreV1().Pods(nsName).Watch(options) - }, - }, - &v1.Pod{}, - 0, - cache.ResourceEventHandlerFuncs{ - AddFunc: func(obj interface{}) { - p, ok := obj.(*v1.Pod) - if !ok { - framework.Logf("Failed to cast observed object to *v1.Pod.") - } - framework.ExpectEqual(ok, true) - go checkPod(p) - }, - UpdateFunc: func(oldObj, newObj interface{}) { - p, ok := newObj.(*v1.Pod) - if !ok { - framework.Logf("Failed to cast observed object to *v1.Pod.") - } - framework.ExpectEqual(ok, true) - go checkPod(p) - }, - }, - ) - latencyPodStores[i] = latencyPodsStore - - go controller.Run(stopCh) - } - for latencyPodsIteration := 0; latencyPodsIteration < latencyPodsIterations; latencyPodsIteration++ { - podIndexOffset := latencyPodsIteration * nodeCount - framework.Logf("Creating %d latency pods in range [%d, %d]", nodeCount, podIndexOffset+1, podIndexOffset+nodeCount) - - watchTimesLen := len(watchTimes) - - // Create some additional pods with throughput ~5 pods/sec. - latencyPodStartupPhase := testPhaseDurations.StartPhase(800+latencyPodsIteration*10, "latency pods creation") - defer latencyPodStartupPhase.End() - var wg sync.WaitGroup - wg.Add(nodeCount) - // Explicitly set requests here. - // Thanks to it we trigger increasing priority function by scheduling - // a pod to a node, which in turn will result in spreading latency pods - // more evenly between nodes. - cpuRequest := *resource.NewMilliQuantity(nodeCPUCapacity/5, resource.DecimalSI) - memRequest := *resource.NewQuantity(nodeMemCapacity/5, resource.DecimalSI) - if podsPerNode > 30 { - // This is to make them schedulable on high-density tests - // (e.g. 100 pods/node kubemark). - cpuRequest = *resource.NewMilliQuantity(0, resource.DecimalSI) - memRequest = *resource.NewQuantity(0, resource.DecimalSI) - } - rcNameToNsMap := map[string]string{} - for i := 1; i <= nodeCount; i++ { - name := additionalPodsPrefix + "-" + strconv.Itoa(podIndexOffset+i) - nsName := namespaces[i%len(namespaces)].Name - rcNameToNsMap[name] = nsName - go createRunningPodFromRC(&wg, c, name, nsName, imageutils.GetPauseImageName(), additionalPodsPrefix, cpuRequest, memRequest) - time.Sleep(200 * time.Millisecond) - } - wg.Wait() - latencyPodStartupPhase.End() - - latencyMeasurementPhase := testPhaseDurations.StartPhase(801+latencyPodsIteration*10, "pod startup latencies measurement") - defer latencyMeasurementPhase.End() - ginkgo.By("Waiting for all Pods begin observed by the watch...") - waitTimeout := 10 * time.Minute - for start := time.Now(); len(watchTimes) < watchTimesLen+nodeCount; time.Sleep(10 * time.Second) { - if time.Since(start) < waitTimeout { - framework.Failf("Timeout reached waiting for all Pods being observed by the watch.") - } - } - - nodeToLatencyPods := make(map[string]int) - for i := range latencyPodStores { - for _, item := range latencyPodStores[i].List() { - pod := item.(*v1.Pod) - nodeToLatencyPods[pod.Spec.NodeName]++ - } - for node, count := range nodeToLatencyPods { - if count > 1 { - framework.Logf("%d latency pods scheduled on %s", count, node) - } - } - } - latencyMeasurementPhase.End() - - ginkgo.By("Removing additional replication controllers") - podDeletionPhase := testPhaseDurations.StartPhase(802+latencyPodsIteration*10, "latency pods deletion") - defer podDeletionPhase.End() - deleteRC := func(i int) { - defer ginkgo.GinkgoRecover() - name := additionalPodsPrefix + "-" + strconv.Itoa(podIndexOffset+i+1) - framework.ExpectNoError(framework.DeleteRCAndWaitForGC(c, rcNameToNsMap[name], name)) - } - workqueue.ParallelizeUntil(context.TODO(), 25, nodeCount, deleteRC) - podDeletionPhase.End() - } - close(stopCh) - - for i := 0; i < len(namespaces); i++ { - nsName := namespaces[i].Name - selector := fields.Set{ - "involvedObject.kind": "Pod", - "involvedObject.namespace": nsName, - "source": v1.DefaultSchedulerName, - }.AsSelector().String() - options := metav1.ListOptions{FieldSelector: selector} - schedEvents, err := c.CoreV1().Events(nsName).List(options) - framework.ExpectNoError(err) - for k := range createTimes { - for _, event := range schedEvents.Items { - if event.InvolvedObject.Name == k { - scheduleTimes[k] = event.FirstTimestamp - break - } - } - } - } - - scheduleLag := make([]e2emetrics.PodLatencyData, 0) - startupLag := make([]e2emetrics.PodLatencyData, 0) - watchLag := make([]e2emetrics.PodLatencyData, 0) - schedToWatchLag := make([]e2emetrics.PodLatencyData, 0) - e2eLag := make([]e2emetrics.PodLatencyData, 0) - - for name, create := range createTimes { - sched, ok := scheduleTimes[name] - if !ok { - framework.Logf("Failed to find schedule time for %v", name) - missingMeasurements++ - } - run, ok := runTimes[name] - if !ok { - framework.Logf("Failed to find run time for %v", name) - missingMeasurements++ - } - watch, ok := watchTimes[name] - if !ok { - framework.Logf("Failed to find watch time for %v", name) - missingMeasurements++ - } - node, ok := nodeNames[name] - if !ok { - framework.Logf("Failed to find node for %v", name) - missingMeasurements++ - } - - scheduleLag = append(scheduleLag, e2emetrics.PodLatencyData{Name: name, Node: node, Latency: sched.Time.Sub(create.Time)}) - startupLag = append(startupLag, e2emetrics.PodLatencyData{Name: name, Node: node, Latency: run.Time.Sub(sched.Time)}) - watchLag = append(watchLag, e2emetrics.PodLatencyData{Name: name, Node: node, Latency: watch.Time.Sub(run.Time)}) - schedToWatchLag = append(schedToWatchLag, e2emetrics.PodLatencyData{Name: name, Node: node, Latency: watch.Time.Sub(sched.Time)}) - e2eLag = append(e2eLag, e2emetrics.PodLatencyData{Name: name, Node: node, Latency: watch.Time.Sub(create.Time)}) - } - - sort.Sort(e2emetrics.LatencySlice(scheduleLag)) - sort.Sort(e2emetrics.LatencySlice(startupLag)) - sort.Sort(e2emetrics.LatencySlice(watchLag)) - sort.Sort(e2emetrics.LatencySlice(schedToWatchLag)) - sort.Sort(e2emetrics.LatencySlice(e2eLag)) - - e2emetrics.PrintLatencies(scheduleLag, "worst create-to-schedule latencies") - e2emetrics.PrintLatencies(startupLag, "worst schedule-to-run latencies") - e2emetrics.PrintLatencies(watchLag, "worst run-to-watch latencies") - e2emetrics.PrintLatencies(schedToWatchLag, "worst schedule-to-watch latencies") - e2emetrics.PrintLatencies(e2eLag, "worst e2e latencies") - - // Capture latency metrics related to pod-startup. - podStartupLatency := &e2emetrics.PodStartupLatency{ - CreateToScheduleLatency: e2emetrics.ExtractLatencyMetrics(scheduleLag), - ScheduleToRunLatency: e2emetrics.ExtractLatencyMetrics(startupLag), - RunToWatchLatency: e2emetrics.ExtractLatencyMetrics(watchLag), - ScheduleToWatchLatency: e2emetrics.ExtractLatencyMetrics(schedToWatchLag), - E2ELatency: e2emetrics.ExtractLatencyMetrics(e2eLag), - } - f.TestSummaries = append(f.TestSummaries, podStartupLatency) - - // Test whether e2e pod startup time is acceptable. - podStartupLatencyThreshold := e2emetrics.LatencyMetric{ - Perc50: PodStartupLatencyThreshold, - Perc90: PodStartupLatencyThreshold, - Perc99: PodStartupLatencyThreshold, - } - framework.ExpectNoError(e2emetrics.VerifyLatencyWithinThreshold(podStartupLatencyThreshold, podStartupLatency.E2ELatency, "pod startup")) - - e2emetrics.LogSuspiciousLatency(startupLag, e2eLag, nodeCount, c) - } - }) - } -}) - -func createRunningPodFromRC(wg *sync.WaitGroup, c clientset.Interface, name, ns, image, podType string, cpuRequest, memRequest resource.Quantity) { - defer ginkgo.GinkgoRecover() - defer wg.Done() - labels := map[string]string{ - "type": podType, - "name": name, - } - rc := &v1.ReplicationController{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Labels: labels, - }, - Spec: v1.ReplicationControllerSpec{ - Replicas: func(i int) *int32 { x := int32(i); return &x }(1), - Selector: labels, - Template: &v1.PodTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Labels: labels, - }, - Spec: v1.PodSpec{ - Containers: []v1.Container{ - { - Name: name, - Image: image, - Resources: v1.ResourceRequirements{ - Requests: v1.ResourceList{ - v1.ResourceCPU: cpuRequest, - v1.ResourceMemory: memRequest, - }, - }, - }, - }, - DNSPolicy: v1.DNSDefault, - }, - }, - }, - } - framework.ExpectNoError(testutils.CreateRCWithRetries(c, ns, rc)) - framework.ExpectNoError(e2epod.WaitForControlledPodsRunning(c, ns, name, api.Kind("ReplicationController"))) - framework.Logf("Found pod '%s' running", name) -} diff --git a/test/e2e/scalability/framework.go b/test/e2e/scalability/framework.go deleted file mode 100644 index ea123af1a69..00000000000 --- a/test/e2e/scalability/framework.go +++ /dev/null @@ -1,32 +0,0 @@ -/* -Copyright 2017 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -// DEPRECATED. -// We already migrated all periodic and presubmit tests to ClusterLoader2. -// We are still keeping this directory for optional functionality, but once -// this is supported in ClusterLoader2 tests, this test will be removed -// (hopefully in 1.16 release). -// Please don't add new functionality to this directory and instead see: -// https://github.com/kubernetes/perf-tests/tree/master/clusterloader2 - -package scalability - -import "github.com/onsi/ginkgo" - -// SIGDescribe is the entry point for the sig-scalability e2e framework -func SIGDescribe(text string, body func()) bool { - return ginkgo.Describe("[sig-scalability] "+text, body) -} diff --git a/test/e2e/scalability/load.go b/test/e2e/scalability/load.go deleted file mode 100644 index e63d012b744..00000000000 --- a/test/e2e/scalability/load.go +++ /dev/null @@ -1,767 +0,0 @@ -/* -Copyright 2015 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -// DEPRECATED. -// We already migrated all periodic and presubmit tests to ClusterLoader2. -// We are still keeping this file for optional functionality, but once -// this is supported in ClusterLoader2 tests, this test will be removed -// (hopefully in 1.16 release). -// Please don't add new functionality to this file and instead see: -// https://github.com/kubernetes/perf-tests/tree/master/clusterloader2 - -package scalability - -import ( - "context" - "fmt" - "math" - "math/rand" - "net" - "net/http" - "os" - "strconv" - "sync" - "time" - - v1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/resource" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/labels" - "k8s.io/apimachinery/pkg/runtime/schema" - "k8s.io/apimachinery/pkg/util/intstr" - utilnet "k8s.io/apimachinery/pkg/util/net" - "k8s.io/apimachinery/pkg/util/wait" - "k8s.io/client-go/discovery" - cacheddiscovery "k8s.io/client-go/discovery/cached/memory" - "k8s.io/client-go/dynamic" - clientset "k8s.io/client-go/kubernetes" - "k8s.io/client-go/kubernetes/scheme" - restclient "k8s.io/client-go/rest" - "k8s.io/client-go/restmapper" - scaleclient "k8s.io/client-go/scale" - "k8s.io/client-go/transport" - "k8s.io/client-go/util/workqueue" - "k8s.io/kubernetes/pkg/apis/batch" - api "k8s.io/kubernetes/pkg/apis/core" - "k8s.io/kubernetes/pkg/apis/extensions" - "k8s.io/kubernetes/test/e2e/framework" - e2emetrics "k8s.io/kubernetes/test/e2e/framework/metrics" - e2enode "k8s.io/kubernetes/test/e2e/framework/node" - "k8s.io/kubernetes/test/e2e/framework/timer" - testutils "k8s.io/kubernetes/test/utils" - - "github.com/onsi/ginkgo" - "github.com/onsi/gomega" -) - -const ( - smallGroupSize = 5 - mediumGroupSize = 30 - bigGroupSize = 250 - smallGroupName = "load-small" - mediumGroupName = "load-medium" - bigGroupName = "load-big" - // We start RCs/Services/pods/... in different namespace in this test. - // nodeCountPerNamespace determines how many namespaces we will be using - // depending on the number of nodes in the underlying cluster. - nodeCountPerNamespace = 100 - // How many threads will be used to create/delete services during this test. - serviceOperationsParallelism = 1 - svcLabelKey = "svc-label" -) - -var randomKind = schema.GroupKind{Kind: "Random"} - -var knownKinds = []schema.GroupKind{ - api.Kind("ReplicationController"), - extensions.Kind("Deployment"), - // TODO: uncomment when Jobs are fixed: #38497 - //batch.Kind("Job"), - extensions.Kind("ReplicaSet"), -} - -// This test suite can take a long time to run, so by default it is added to -// the ginkgo.skip list (see driver.go). -// To run this suite you must explicitly ask for it by setting the -// -t/--test flag or ginkgo.focus flag. -var _ = SIGDescribe("Load capacity", func() { - var clientset clientset.Interface - var nodeCount int - var ns string - var configs []testutils.RunObjectConfig - var secretConfigs []*testutils.SecretConfig - var configMapConfigs []*testutils.ConfigMapConfig - - testCaseBaseName := "load" - var testPhaseDurations *timer.TestPhaseTimer - var profileGathererStopCh chan struct{} - - ginkgo.BeforeEach(func() { - framework.SkipUnlessSSHKeyPresent() - }) - - // Gathers metrics before teardown - // TODO add flag that allows to skip cleanup on failure - ginkgo.AfterEach(func() { - // Stop apiserver CPU profile gatherer and gather memory allocations profile. - close(profileGathererStopCh) - wg := sync.WaitGroup{} - wg.Add(1) - framework.GatherMemoryProfile("kube-apiserver", "load", &wg) - wg.Wait() - - // Verify latency metrics - highLatencyRequests, metrics, err := e2emetrics.HighLatencyRequests(clientset, nodeCount) - framework.ExpectNoError(err) - if err == nil { - summaries := make([]framework.TestDataSummary, 0, 2) - summaries = append(summaries, metrics) - summaries = append(summaries, testPhaseDurations) - framework.PrintSummaries(summaries, testCaseBaseName) - gomega.Expect(highLatencyRequests).NotTo(gomega.BeNumerically(">", 0), "There should be no high-latency requests") - } - }) - - // We assume a default throughput of 10 pods/second throughput. - // We may want to revisit it in the future. - // However, this can be overridden by LOAD_TEST_THROUGHPUT env var. - throughput := 10 - if throughputEnv := os.Getenv("LOAD_TEST_THROUGHPUT"); throughputEnv != "" { - if newThroughput, err := strconv.Atoi(throughputEnv); err == nil { - throughput = newThroughput - } - } - - // Explicitly put here, to delete namespace at the end of the test - // (after measuring latency metrics, etc.). - options := framework.Options{ - ClientQPS: float32(math.Max(50.0, float64(2*throughput))), - ClientBurst: int(math.Max(100.0, float64(4*throughput))), - } - f := framework.NewFramework(testCaseBaseName, options, nil) - f.NamespaceDeletionTimeout = time.Hour - - ginkgo.BeforeEach(func() { - testPhaseDurations = timer.NewTestPhaseTimer() - clientset = f.ClientSet - - ns = f.Namespace.Name - - _, err := e2enode.GetRandomReadySchedulableNode(clientset) - framework.ExpectNoError(err) - - // Terminating a namespace (deleting the remaining objects from it - which - // generally means events) can affect the current run. Thus we wait for all - // terminating namespace to be finally deleted before starting this test. - err = framework.CheckTestingNSDeletedExcept(clientset, ns) - framework.ExpectNoError(err) - - framework.ExpectNoError(e2emetrics.ResetMetrics(clientset)) - - // Start apiserver CPU profile gatherer with frequency based on cluster size. - profileGatheringDelay := time.Duration(5+nodeCount/100) * time.Minute - profileGathererStopCh = framework.StartCPUProfileGatherer("kube-apiserver", "load", profileGatheringDelay) - }) - - type Load struct { - podsPerNode int - image string - command []string - // What kind of resource we want to create - kind schema.GroupKind - services bool - secretsPerPod int - configMapsPerPod int - daemonsPerNode int - quotas bool - } - - serveHostnameCmd := []string{"/agnhost", "serve-hostname"} - loadTests := []Load{ - // The container will consume 1 cpu and 512mb of memory. - {podsPerNode: 3, image: "jess/stress", command: []string{"stress", "-c", "1", "-m", "2"}, kind: api.Kind("ReplicationController")}, - {podsPerNode: 30, image: framework.ServeHostnameImage, command: serveHostnameCmd, kind: api.Kind("ReplicationController")}, - // Tests for other resource types - {podsPerNode: 30, image: framework.ServeHostnameImage, command: serveHostnameCmd, kind: extensions.Kind("Deployment")}, - {podsPerNode: 30, image: framework.ServeHostnameImage, command: serveHostnameCmd, kind: batch.Kind("Job")}, - // Test scheduling when daemons are preset - {podsPerNode: 30, image: framework.ServeHostnameImage, command: serveHostnameCmd, kind: api.Kind("ReplicationController"), daemonsPerNode: 2}, - // Test with secrets - {podsPerNode: 30, image: framework.ServeHostnameImage, command: serveHostnameCmd, kind: extensions.Kind("Deployment"), secretsPerPod: 2}, - // Test with configmaps - {podsPerNode: 30, image: framework.ServeHostnameImage, command: serveHostnameCmd, kind: extensions.Kind("Deployment"), configMapsPerPod: 2}, - // Special test case which randomizes created resources - {podsPerNode: 30, image: framework.ServeHostnameImage, command: serveHostnameCmd, kind: randomKind}, - // Test with quotas - {podsPerNode: 30, image: framework.ServeHostnameImage, command: serveHostnameCmd, kind: api.Kind("ReplicationController"), quotas: true}, - {podsPerNode: 30, image: framework.ServeHostnameImage, command: serveHostnameCmd, kind: randomKind, quotas: true}, - } - - isCanonical := func(test *Load) bool { - return test.podsPerNode == 30 && test.kind == api.Kind("ReplicationController") && test.daemonsPerNode == 0 && test.secretsPerPod == 0 && test.configMapsPerPod == 0 && !test.quotas - } - - for _, testArg := range loadTests { - feature := "ManualPerformance" - if isCanonical(&testArg) { - feature = "Performance" - } - name := fmt.Sprintf("[Feature:%s] should be able to handle %v pods per node %v with %v secrets, %v configmaps and %v daemons", - feature, - testArg.podsPerNode, - testArg.kind, - testArg.secretsPerPod, - testArg.configMapsPerPod, - testArg.daemonsPerNode, - ) - if testArg.quotas { - name += " with quotas" - } - itArg := testArg - itArg.services = os.Getenv("CREATE_SERVICES") != "false" - - ginkgo.It(name, func() { - // Create a number of namespaces. - namespaceCount := (nodeCount + nodeCountPerNamespace - 1) / nodeCountPerNamespace - namespaces, err := CreateNamespaces(f, namespaceCount, fmt.Sprintf("load-%v-nodepods", itArg.podsPerNode), testPhaseDurations.StartPhase(110, "namespace creation")) - framework.ExpectNoError(err) - - totalPods := (itArg.podsPerNode - itArg.daemonsPerNode) * nodeCount - configs, secretConfigs, configMapConfigs = generateConfigs(totalPods, itArg.image, itArg.command, namespaces, itArg.kind, itArg.secretsPerPod, itArg.configMapsPerPod) - - if itArg.quotas { - framework.ExpectNoError(CreateQuotas(f, namespaces, 2*totalPods, testPhaseDurations.StartPhase(115, "quota creation"))) - } - - f.AddonResourceConstraints = loadResourceConstraints() - - serviceCreationPhase := testPhaseDurations.StartPhase(120, "services creation") - defer serviceCreationPhase.End() - if itArg.services { - framework.Logf("Creating services") - services := generateServicesForConfigs(configs) - createService := func(i int) { - defer ginkgo.GinkgoRecover() - framework.ExpectNoError(testutils.CreateServiceWithRetries(clientset, services[i].Namespace, services[i])) - } - workqueue.ParallelizeUntil(context.TODO(), serviceOperationsParallelism, len(services), createService) - framework.Logf("%v Services created.", len(services)) - defer func(services []*v1.Service) { - serviceCleanupPhase := testPhaseDurations.StartPhase(800, "services deletion") - defer serviceCleanupPhase.End() - framework.Logf("Starting to delete services...") - deleteService := func(i int) { - defer ginkgo.GinkgoRecover() - framework.ExpectNoError(testutils.DeleteResourceWithRetries(clientset, api.Kind("Service"), services[i].Namespace, services[i].Name, nil)) - } - workqueue.ParallelizeUntil(context.TODO(), serviceOperationsParallelism, len(services), deleteService) - framework.Logf("Services deleted") - }(services) - } else { - framework.Logf("Skipping service creation") - } - serviceCreationPhase.End() - // Create all secrets. - secretsCreationPhase := testPhaseDurations.StartPhase(130, "secrets creation") - defer secretsCreationPhase.End() - for i := range secretConfigs { - secretConfigs[i].Run() - defer secretConfigs[i].Stop() - } - secretsCreationPhase.End() - // Create all configmaps. - configMapsCreationPhase := testPhaseDurations.StartPhase(140, "configmaps creation") - defer configMapsCreationPhase.End() - for i := range configMapConfigs { - configMapConfigs[i].Run() - defer configMapConfigs[i].Stop() - } - configMapsCreationPhase.End() - // StartDaemon if needed - daemonSetCreationPhase := testPhaseDurations.StartPhase(150, "daemonsets creation") - defer daemonSetCreationPhase.End() - for i := 0; i < itArg.daemonsPerNode; i++ { - daemonName := fmt.Sprintf("load-daemon-%v", i) - daemonConfig := &testutils.DaemonConfig{ - Client: f.ClientSet, - Name: daemonName, - Namespace: f.Namespace.Name, - LogFunc: framework.Logf, - } - daemonConfig.Run() - defer func(config *testutils.DaemonConfig) { - framework.ExpectNoError(framework.DeleteResourceAndWaitForGC( - f.ClientSet, - extensions.Kind("DaemonSet"), - config.Namespace, - config.Name, - )) - }(daemonConfig) - } - daemonSetCreationPhase.End() - - // Simulate lifetime of RC: - // * create with initial size - // * scale RC to a random size and list all pods - // * scale RC to a random size and list all pods - // * delete it - // - // This will generate ~5 creations/deletions per second assuming: - // - X small RCs each 5 pods [ 5 * X = totalPods / 2 ] - // - Y medium RCs each 30 pods [ 30 * Y = totalPods / 4 ] - // - Z big RCs each 250 pods [ 250 * Z = totalPods / 4] - - // We would like to spread creating replication controllers over time - // to make it possible to create/schedule them in the meantime. - // Currently we assume pods/second average throughput. - // We may want to revisit it in the future. - framework.Logf("Starting to create %v objects...", itArg.kind) - creatingTime := time.Duration(totalPods/throughput) * time.Second - - createAllResources(configs, creatingTime, testPhaseDurations.StartPhase(200, "load pods creation")) - ginkgo.By("============================================================================") - - // We would like to spread scaling replication controllers over time - // to make it possible to create/schedule & delete them in the meantime. - // Currently we assume that pods/second average throughput. - - // The expected number of created/deleted pods is totalPods/4 when scaling, - // as each RC changes its size from X to a uniform random value in [X/2, 3X/2]. - scalingTime := time.Duration(totalPods/(4*throughput)) * time.Second - framework.Logf("Starting to scale %v objects first time...", itArg.kind) - scaleAllResources(configs, scalingTime, testPhaseDurations.StartPhase(300, "scaling first time")) - ginkgo.By("============================================================================") - - // Cleanup all created replication controllers. - // Currently we assume pods/second average deletion throughput. - // We may want to revisit it in the future. - deletingTime := time.Duration(totalPods/throughput) * time.Second - framework.Logf("Starting to delete %v objects...", itArg.kind) - deleteAllResources(configs, deletingTime, testPhaseDurations.StartPhase(500, "load pods deletion")) - }) - } -}) - -func createClients(numberOfClients int) ([]clientset.Interface, []scaleclient.ScalesGetter, error) { - clients := make([]clientset.Interface, numberOfClients) - scalesClients := make([]scaleclient.ScalesGetter, numberOfClients) - - for i := 0; i < numberOfClients; i++ { - config, err := framework.LoadConfig() - framework.ExpectNoError(err) - config.QPS = 100 - config.Burst = 200 - if framework.TestContext.KubeAPIContentType != "" { - config.ContentType = framework.TestContext.KubeAPIContentType - } - - // For the purpose of this test, we want to force that clients - // do not share underlying transport (which is a default behavior - // in Kubernetes). Thus, we are explicitly creating transport for - // each client here. - transportConfig, err := config.TransportConfig() - if err != nil { - return nil, nil, err - } - tlsConfig, err := transport.TLSConfigFor(transportConfig) - if err != nil { - return nil, nil, err - } - config.Transport = utilnet.SetTransportDefaults(&http.Transport{ - Proxy: http.ProxyFromEnvironment, - TLSHandshakeTimeout: 10 * time.Second, - TLSClientConfig: tlsConfig, - MaxIdleConnsPerHost: 100, - DialContext: (&net.Dialer{ - Timeout: 30 * time.Second, - KeepAlive: 30 * time.Second, - }).DialContext, - }) - config.WrapTransport = transportConfig.WrapTransport - config.Dial = transportConfig.Dial - // Overwrite TLS-related fields from config to avoid collision with - // Transport field. - config.TLSClientConfig = restclient.TLSClientConfig{} - config.AuthProvider = nil - config.ExecProvider = nil - - c, err := clientset.NewForConfig(config) - if err != nil { - return nil, nil, err - } - clients[i] = c - - // create scale client, if GroupVersion or NegotiatedSerializer are not set - // assign default values - these fields are mandatory (required by RESTClientFor). - if config.GroupVersion == nil { - config.GroupVersion = &schema.GroupVersion{} - } - if config.NegotiatedSerializer == nil { - config.NegotiatedSerializer = scheme.Codecs - } - restClient, err := restclient.RESTClientFor(config) - if err != nil { - return nil, nil, err - } - discoClient, err := discovery.NewDiscoveryClientForConfig(config) - if err != nil { - return nil, nil, err - } - cachedDiscoClient := cacheddiscovery.NewMemCacheClient(discoClient) - restMapper := restmapper.NewDeferredDiscoveryRESTMapper(cachedDiscoClient) - restMapper.Reset() - resolver := scaleclient.NewDiscoveryScaleKindResolver(cachedDiscoClient) - scalesClients[i] = scaleclient.New(restClient, restMapper, dynamic.LegacyAPIPathResolverFunc, resolver) - } - return clients, scalesClients, nil -} - -func computePodCounts(total int) (int, int, int) { - // Small RCs owns ~0.5 of total number of pods, medium and big RCs ~0.25 each. - // For example for 3000 pods (100 nodes, 30 pods per node) there are: - // - 300 small RCs each 5 pods - // - 25 medium RCs each 30 pods - // - 3 big RCs each 250 pods - bigGroupCount := total / 4 / bigGroupSize - total -= bigGroupCount * bigGroupSize - mediumGroupCount := total / 3 / mediumGroupSize - total -= mediumGroupCount * mediumGroupSize - smallGroupCount := total / smallGroupSize - return smallGroupCount, mediumGroupCount, bigGroupCount -} - -func loadResourceConstraints() map[string]framework.ResourceConstraint { - constraints := make(map[string]framework.ResourceConstraint) - constraints["coredns"] = framework.ResourceConstraint{ - CPUConstraint: framework.NoCPUConstraint, - MemoryConstraint: 170 * (1024 * 1024), - } - constraints["kubedns"] = framework.ResourceConstraint{ - CPUConstraint: framework.NoCPUConstraint, - MemoryConstraint: 170 * (1024 * 1024), - } - return constraints -} - -func generateConfigs( - totalPods int, - image string, - command []string, - nss []*v1.Namespace, - kind schema.GroupKind, - secretsPerPod int, - configMapsPerPod int, -) ([]testutils.RunObjectConfig, []*testutils.SecretConfig, []*testutils.ConfigMapConfig) { - configs := make([]testutils.RunObjectConfig, 0) - secretConfigs := make([]*testutils.SecretConfig, 0) - configMapConfigs := make([]*testutils.ConfigMapConfig, 0) - - smallGroupCount, mediumGroupCount, bigGroupCount := computePodCounts(totalPods) - newConfigs, newSecretConfigs, newConfigMapConfigs := GenerateConfigsForGroup(nss, smallGroupName, smallGroupSize, smallGroupCount, image, command, kind, secretsPerPod, configMapsPerPod) - configs = append(configs, newConfigs...) - secretConfigs = append(secretConfigs, newSecretConfigs...) - configMapConfigs = append(configMapConfigs, newConfigMapConfigs...) - newConfigs, newSecretConfigs, newConfigMapConfigs = GenerateConfigsForGroup(nss, mediumGroupName, mediumGroupSize, mediumGroupCount, image, command, kind, secretsPerPod, configMapsPerPod) - configs = append(configs, newConfigs...) - secretConfigs = append(secretConfigs, newSecretConfigs...) - configMapConfigs = append(configMapConfigs, newConfigMapConfigs...) - newConfigs, newSecretConfigs, newConfigMapConfigs = GenerateConfigsForGroup(nss, bigGroupName, bigGroupSize, bigGroupCount, image, command, kind, secretsPerPod, configMapsPerPod) - configs = append(configs, newConfigs...) - secretConfigs = append(secretConfigs, newSecretConfigs...) - configMapConfigs = append(configMapConfigs, newConfigMapConfigs...) - - // Create a number of clients to better simulate real usecase - // where not everyone is using exactly the same client. - rcsPerClient := 20 - clients, scalesClients, err := createClients((len(configs) + rcsPerClient - 1) / rcsPerClient) - framework.ExpectNoError(err) - - for i := 0; i < len(configs); i++ { - configs[i].SetClient(clients[i%len(clients)]) - configs[i].SetScalesClient(scalesClients[i%len(clients)]) - } - for i := 0; i < len(secretConfigs); i++ { - secretConfigs[i].Client = clients[i%len(clients)] - } - for i := 0; i < len(configMapConfigs); i++ { - configMapConfigs[i].Client = clients[i%len(clients)] - } - - return configs, secretConfigs, configMapConfigs -} - -// GenerateConfigsForGroup generates the configuration needed for a group -func GenerateConfigsForGroup( - nss []*v1.Namespace, - groupName string, - size, count int, - image string, - command []string, - kind schema.GroupKind, - secretsPerPod int, - configMapsPerPod int, -) ([]testutils.RunObjectConfig, []*testutils.SecretConfig, []*testutils.ConfigMapConfig) { - configs := make([]testutils.RunObjectConfig, 0, count) - secretConfigs := make([]*testutils.SecretConfig, 0, count*secretsPerPod) - configMapConfigs := make([]*testutils.ConfigMapConfig, 0, count*configMapsPerPod) - savedKind := kind - for i := 1; i <= count; i++ { - kind = savedKind - namespace := nss[i%len(nss)].Name - secretNames := make([]string, 0, secretsPerPod) - configMapNames := make([]string, 0, configMapsPerPod) - - for j := 0; j < secretsPerPod; j++ { - secretName := fmt.Sprintf("%v-%v-secret-%v", groupName, i, j) - secretConfigs = append(secretConfigs, &testutils.SecretConfig{ - Content: map[string]string{"foo": "bar"}, - Client: nil, // this will be overwritten later - Name: secretName, - Namespace: namespace, - LogFunc: framework.Logf, - }) - secretNames = append(secretNames, secretName) - } - - for j := 0; j < configMapsPerPod; j++ { - configMapName := fmt.Sprintf("%v-%v-configmap-%v", groupName, i, j) - configMapConfigs = append(configMapConfigs, &testutils.ConfigMapConfig{ - Content: map[string]string{"foo": "bar"}, - Client: nil, // this will be overwritten later - Name: configMapName, - Namespace: namespace, - LogFunc: framework.Logf, - }) - configMapNames = append(configMapNames, configMapName) - } - - baseConfig := &testutils.RCConfig{ - Client: nil, // this will be overwritten later - Name: groupName + "-" + strconv.Itoa(i), - Namespace: namespace, - Timeout: UnreadyNodeToleration, - Image: image, - Command: command, - Replicas: size, - CpuRequest: 10, // 0.01 core - MemRequest: 26214400, // 25MB - SecretNames: secretNames, - ConfigMapNames: configMapNames, - // Define a label to group every 2 RCs into one service. - Labels: map[string]string{svcLabelKey: groupName + "-" + strconv.Itoa((i+1)/2)}, - Tolerations: []v1.Toleration{ - { - Key: "node.kubernetes.io/not-ready", - Operator: v1.TolerationOpExists, - Effect: v1.TaintEffectNoExecute, - TolerationSeconds: func(i int64) *int64 { return &i }(int64(UnreadyNodeToleration / time.Second)), - }, { - Key: "node.kubernetes.io/unreachable", - Operator: v1.TolerationOpExists, - Effect: v1.TaintEffectNoExecute, - TolerationSeconds: func(i int64) *int64 { return &i }(int64(UnreadyNodeToleration / time.Second)), - }, - }, - } - - if kind == randomKind { - kind = knownKinds[rand.Int()%len(knownKinds)] - } - - var config testutils.RunObjectConfig - switch kind { - case api.Kind("ReplicationController"): - config = baseConfig - case extensions.Kind("ReplicaSet"): - config = &testutils.ReplicaSetConfig{RCConfig: *baseConfig} - case extensions.Kind("Deployment"): - config = &testutils.DeploymentConfig{RCConfig: *baseConfig} - case batch.Kind("Job"): - config = &testutils.JobConfig{RCConfig: *baseConfig} - default: - framework.Failf("Unsupported kind for config creation: %v", kind) - } - configs = append(configs, config) - } - return configs, secretConfigs, configMapConfigs -} - -func generateServicesForConfigs(configs []testutils.RunObjectConfig) []*v1.Service { - services := make([]*v1.Service, 0) - currentSvcLabel := "" - for _, config := range configs { - svcLabel, found := config.GetLabelValue(svcLabelKey) - if !found || svcLabel == currentSvcLabel { - continue - } - currentSvcLabel = svcLabel - serviceName := config.GetName() + "-svc" - labels := map[string]string{ - "name": config.GetName(), - svcLabelKey: currentSvcLabel, - } - service := &v1.Service{ - ObjectMeta: metav1.ObjectMeta{ - Name: serviceName, - Namespace: config.GetNamespace(), - }, - Spec: v1.ServiceSpec{ - Selector: labels, - Ports: []v1.ServicePort{{ - Port: 80, - TargetPort: intstr.FromInt(80), - }}, - }, - } - services = append(services, service) - } - return services -} - -func sleepUpTo(d time.Duration) { - if d.Nanoseconds() > 0 { - time.Sleep(time.Duration(rand.Int63n(d.Nanoseconds()))) - } -} - -func retryWithExponentialBackOff(initialDuration time.Duration, fn wait.ConditionFunc) error { - backoff := wait.Backoff{ - Duration: initialDuration, - Factor: 3, - Jitter: 0, - Steps: 6, - } - return wait.ExponentialBackoff(backoff, fn) -} - -func createAllResources(configs []testutils.RunObjectConfig, creatingTime time.Duration, testPhase *timer.Phase) { - defer testPhase.End() - var wg sync.WaitGroup - wg.Add(len(configs)) - for _, config := range configs { - go createResource(&wg, config, creatingTime) - } - wg.Wait() -} - -func createResource(wg *sync.WaitGroup, config testutils.RunObjectConfig, creatingTime time.Duration) { - defer ginkgo.GinkgoRecover() - defer wg.Done() - - sleepUpTo(creatingTime) - framework.ExpectNoError(config.Run(), fmt.Sprintf("creating %v %s", config.GetKind(), config.GetName())) -} - -func scaleAllResources(configs []testutils.RunObjectConfig, scalingTime time.Duration, testPhase *timer.Phase) { - defer testPhase.End() - var wg sync.WaitGroup - wg.Add(len(configs)) - for _, config := range configs { - go scaleResource(&wg, config, scalingTime) - } - wg.Wait() -} - -// Scales RC to a random size within [0.5*size, 1.5*size] and lists all the pods afterwards. -// Scaling happens always based on original size, not the current size. -func scaleResource(wg *sync.WaitGroup, config testutils.RunObjectConfig, scalingTime time.Duration) { - defer ginkgo.GinkgoRecover() - defer wg.Done() - - sleepUpTo(scalingTime) - newSize := uint(rand.Intn(config.GetReplicas()) + config.GetReplicas()/2) - framework.ExpectNoError(framework.ScaleResource( - config.GetClient(), - config.GetScalesGetter(), - config.GetNamespace(), - config.GetName(), - newSize, - true, - config.GetKind(), - config.GetGroupVersionResource(), - ), - fmt.Sprintf("scaling %v %v", config.GetKind(), config.GetName())) - - selector := labels.SelectorFromSet(labels.Set(map[string]string{"name": config.GetName()})) - options := metav1.ListOptions{ - LabelSelector: selector.String(), - ResourceVersion: "0", - } - listResourcePodsFunc := func() (bool, error) { - _, err := config.GetClient().CoreV1().Pods(config.GetNamespace()).List(options) - if err == nil { - return true, nil - } - framework.Logf("Failed to list pods from %v %v due to: %v", config.GetKind(), config.GetName(), err) - if testutils.IsRetryableAPIError(err) { - return false, nil - } - return false, fmt.Errorf("Failed to list pods from %v %v with non-retriable error: %v", config.GetKind(), config.GetName(), err) - } - err := retryWithExponentialBackOff(100*time.Millisecond, listResourcePodsFunc) - framework.ExpectNoError(err) -} - -func deleteAllResources(configs []testutils.RunObjectConfig, deletingTime time.Duration, testPhase *timer.Phase) { - defer testPhase.End() - var wg sync.WaitGroup - wg.Add(len(configs)) - for _, config := range configs { - go deleteResource(&wg, config, deletingTime) - } - wg.Wait() -} - -func deleteResource(wg *sync.WaitGroup, config testutils.RunObjectConfig, deletingTime time.Duration) { - defer ginkgo.GinkgoRecover() - defer wg.Done() - - sleepUpTo(deletingTime) - framework.ExpectNoError(framework.DeleteResourceAndWaitForGC( - config.GetClient(), config.GetKind(), config.GetNamespace(), config.GetName()), - fmt.Sprintf("deleting %v %s", config.GetKind(), config.GetName())) -} - -// CreateNamespaces creates a namespace -func CreateNamespaces(f *framework.Framework, namespaceCount int, namePrefix string, testPhase *timer.Phase) ([]*v1.Namespace, error) { - defer testPhase.End() - namespaces := []*v1.Namespace{} - for i := 1; i <= namespaceCount; i++ { - namespace, err := f.CreateNamespace(fmt.Sprintf("%v-%d", namePrefix, i), nil) - if err != nil { - return []*v1.Namespace{}, err - } - namespaces = append(namespaces, namespace) - } - return namespaces, nil -} - -// CreateQuotas creates quotas -func CreateQuotas(f *framework.Framework, namespaces []*v1.Namespace, podCount int, testPhase *timer.Phase) error { - defer testPhase.End() - quotaTemplate := &v1.ResourceQuota{ - Spec: v1.ResourceQuotaSpec{ - Hard: v1.ResourceList{"pods": *resource.NewQuantity(int64(podCount), resource.DecimalSI)}, - }, - } - for _, ns := range namespaces { - quotaTemplate.Name = ns.Name + "-quota" - if err := testutils.CreateResourceQuotaWithRetries(f.ClientSet, ns.Name, quotaTemplate); err != nil { - return fmt.Errorf("Error creating quota: %v", err) - } - } - return nil -}