diff --git a/test/e2e/cadvisor.go b/test/e2e/cadvisor.go index c369ea52466..dc8dffec2ef 100644 --- a/test/e2e/cadvisor.go +++ b/test/e2e/cadvisor.go @@ -27,24 +27,6 @@ import ( . "github.com/onsi/ginkgo" ) -// returns maxRetries, sleepDuration -func readConfig() (int, time.Duration) { - // Read in configuration settings, reasonable defaults. - retry := framework.TestContext.Cadvisor.MaxRetries - if framework.TestContext.Cadvisor.MaxRetries == 0 { - retry = 6 - framework.Logf("Overriding default retry value of zero to %d", retry) - } - - sleepDurationMS := framework.TestContext.Cadvisor.SleepDurationMS - if sleepDurationMS == 0 { - sleepDurationMS = 10000 - framework.Logf("Overriding default milliseconds value of zero to %d", sleepDurationMS) - } - - return retry, time.Duration(sleepDurationMS) * time.Millisecond -} - var _ = framework.KubeDescribe("Cadvisor", func() { f := framework.NewDefaultFramework("cadvisor") @@ -60,6 +42,25 @@ func CheckCadvisorHealthOnAllNodes(c *client.Client, timeout time.Duration) { nodeList, err := c.Nodes().List(api.ListOptions{}) framework.ExpectNoError(err) var errors []error + + // returns maxRetries, sleepDuration + readConfig := func() (int, time.Duration) { + // Read in configuration settings, reasonable defaults. + retry := framework.TestContext.Cadvisor.MaxRetries + if framework.TestContext.Cadvisor.MaxRetries == 0 { + retry = 6 + framework.Logf("Overriding default retry value of zero to %d", retry) + } + + sleepDurationMS := framework.TestContext.Cadvisor.SleepDurationMS + if sleepDurationMS == 0 { + sleepDurationMS = 10000 + framework.Logf("Overriding default milliseconds value of zero to %d", sleepDurationMS) + } + + return retry, time.Duration(sleepDurationMS) * time.Millisecond + } + maxRetries, sleepDuration := readConfig() for { errors = []error{} diff --git a/test/e2e/framework/test_context.go b/test/e2e/framework/test_context.go index f463c879a09..460ac7572e3 100644 --- a/test/e2e/framework/test_context.go +++ b/test/e2e/framework/test_context.go @@ -79,6 +79,7 @@ type TestContextType struct { NodeTestContextType // Viper-only parameters. These will in time replace all flags. + // Example: Create a file 'e2e.json' with the following: // "Cadvisor":{ // "MaxRetries":"6" @@ -89,6 +90,11 @@ type TestContextType struct { MaxRetries int SleepDurationMS int } + + LoggingSoak struct { + Scale int + MilliSecondsBetweenWaves int + } } // NodeTestContextType is part of TestContextType, it is shared by all node e2e test. @@ -205,7 +211,6 @@ func RegisterNodeFlags() { // Enable viper configuration management of flags. func ViperizeFlags() { - // TODO @jayunit100: Maybe a more elegant viper-flag integration for the future? // For now, we layer it on top, because 'flag' deps of 'go test' make pflag wrappers // fragile, seeming to force 'flag' to have deep awareness of pflag params. diff --git a/test/e2e/logging_soak.go b/test/e2e/logging_soak.go new file mode 100644 index 00000000000..c0f7d6943f4 --- /dev/null +++ b/test/e2e/logging_soak.go @@ -0,0 +1,136 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + "fmt" + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + "k8s.io/kubernetes/pkg/api" + "k8s.io/kubernetes/test/e2e/framework" + "strconv" + "strings" + "sync" + "time" +) + +var _ = framework.KubeDescribe("Logging soak [Performance] [Slow] [Disruptive]", func() { + + f := framework.NewDefaultFramework("logging-soak") + + // Not a global constant (irrelevant outside this test), also not a parameter (if you want more logs, use --scale=). + kbRateInSeconds := 1 * time.Second + totalLogTime := 2 * time.Minute + + // This test is designed to run and confirm that logs are being generated at a large scale, and that they can be grabbed by the kubelet. + // By running it repeatedly in the background, you can simulate large collections of chatty containers. + // This can expose problems in your docker configuration (logging), log searching infrastructure, to tune deployments to match high load + // scenarios. TODO jayunit100 add this to the kube CI in a follow on infra patch. + + // Returns scale (how many waves of pods). + // Returns wave interval (how many seconds to wait before dumping the next wave of pods). + readConfig := func() (int, time.Duration) { + // Read in configuration settings, reasonable defaults. + scale := framework.TestContext.LoggingSoak.Scale + if framework.TestContext.LoggingSoak.Scale == 0 { + scale = 1 + framework.Logf("Overriding default scale value of zero to %d", scale) + } + + milliSecondsBetweenWaves := framework.TestContext.LoggingSoak.MilliSecondsBetweenWaves + if milliSecondsBetweenWaves == 0 { + milliSecondsBetweenWaves = 5000 + framework.Logf("Overriding default milliseconds value of zero to %d", milliSecondsBetweenWaves) + } + + return scale, time.Duration(milliSecondsBetweenWaves) * time.Millisecond + } + + scale, millisecondsBetweenWaves := readConfig() + It(fmt.Sprintf("should survive logging 1KB every %v seconds, for a duration of %v, scaling up to %v pods per node", kbRateInSeconds, totalLogTime, scale), func() { + defer GinkgoRecover() + var wg sync.WaitGroup + wg.Add(scale) + for i := 0; i < scale; i++ { + go func() { + wave := fmt.Sprintf("wave%v", strconv.Itoa(i)) + framework.Logf("Starting logging soak, wave = %v", wave) + RunLogPodsWithSleepOf(f, kbRateInSeconds, wave, totalLogTime) + framework.Logf("Completed logging soak, wave %v", i) + wg.Done() + }() + // Niceness. + time.Sleep(millisecondsBetweenWaves) + } + framework.Logf("Waiting on all %v logging soak waves to complete", scale) + wg.Wait() + }) +}) + +// RunLogPodsWithSleepOf creates a pod on every node, logs continuously (with "sleep" pauses), and verifies that the log string +// was produced in each and every pod at least once. The final arg is the timeout for the test to verify all the pods got logs. +func RunLogPodsWithSleepOf(f *framework.Framework, sleep time.Duration, podname string, timeout time.Duration) { + + nodes := framework.GetReadySchedulableNodesOrDie(f.Client) + totalPods := len(nodes.Items) + Expect(totalPods).NotTo(Equal(0)) + + kilobyte := strings.Repeat("logs-123", 128) // 8*128=1024 = 1KB of text. + + appName := "logging-soak" + podname + podlables := f.CreatePodsPerNodeForSimpleApp( + appName, + func(n api.Node) api.PodSpec { + return api.PodSpec{ + Containers: []api.Container{{ + Name: "logging-soak", + Image: "gcr.io/google_containers/busybox:1.24", + Args: []string{ + "/bin/sh", + "-c", + fmt.Sprintf("while true ; do echo %v ; sleep %v; done", kilobyte, sleep.Seconds()), + }, + }}, + NodeName: n.Name, + RestartPolicy: api.RestartPolicyAlways, + } + }, + totalPods, + ) + + logSoakVerification := f.NewClusterVerification( + framework.PodStateVerification{ + Selectors: podlables, + ValidPhases: []api.PodPhase{api.PodRunning, api.PodSucceeded}, + // we don't validate total log data, since there is no gaurantee all logs will be stored forever. + // instead, we just validate that some logs are being created in std out. + Verify: func(p api.Pod) (bool, error) { + s, err := framework.LookForStringInLog(f.Namespace.Name, p.Name, "logging-soak", "logs-123", 1*time.Second) + return s != "", err + }, + }, + ) + + largeClusterForgiveness := time.Duration(len(nodes.Items)/5) * time.Second // i.e. a 100 node cluster gets an extra 20 seconds to complete. + pods, err := logSoakVerification.WaitFor(totalPods, timeout+largeClusterForgiveness) + + if err != nil { + framework.Failf("Error in wait... %v", err) + } else if len(pods) < totalPods { + framework.Failf("Only got %v out of %v", len(pods), totalPods) + } +}