mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-31 23:37:01 +00:00
Merge pull request #24536 from jayunit100/LoggingSoak
Automatic merge from submit-queue Logging soak Implements #24427 Needs - #24471 so that it doesnt clog test outputs for scale - builds on the utils function added in support of #22869 cc @timothysc @kubernetes/sig-testing
This commit is contained in:
commit
5374e48e60
@ -27,24 +27,6 @@ import (
|
||||
. "github.com/onsi/ginkgo"
|
||||
)
|
||||
|
||||
// returns maxRetries, sleepDuration
|
||||
func readConfig() (int, time.Duration) {
|
||||
// Read in configuration settings, reasonable defaults.
|
||||
retry := framework.TestContext.Cadvisor.MaxRetries
|
||||
if framework.TestContext.Cadvisor.MaxRetries == 0 {
|
||||
retry = 6
|
||||
framework.Logf("Overriding default retry value of zero to %d", retry)
|
||||
}
|
||||
|
||||
sleepDurationMS := framework.TestContext.Cadvisor.SleepDurationMS
|
||||
if sleepDurationMS == 0 {
|
||||
sleepDurationMS = 10000
|
||||
framework.Logf("Overriding default milliseconds value of zero to %d", sleepDurationMS)
|
||||
}
|
||||
|
||||
return retry, time.Duration(sleepDurationMS) * time.Millisecond
|
||||
}
|
||||
|
||||
var _ = framework.KubeDescribe("Cadvisor", func() {
|
||||
|
||||
f := framework.NewDefaultFramework("cadvisor")
|
||||
@ -60,6 +42,25 @@ func CheckCadvisorHealthOnAllNodes(c *client.Client, timeout time.Duration) {
|
||||
nodeList, err := c.Nodes().List(api.ListOptions{})
|
||||
framework.ExpectNoError(err)
|
||||
var errors []error
|
||||
|
||||
// returns maxRetries, sleepDuration
|
||||
readConfig := func() (int, time.Duration) {
|
||||
// Read in configuration settings, reasonable defaults.
|
||||
retry := framework.TestContext.Cadvisor.MaxRetries
|
||||
if framework.TestContext.Cadvisor.MaxRetries == 0 {
|
||||
retry = 6
|
||||
framework.Logf("Overriding default retry value of zero to %d", retry)
|
||||
}
|
||||
|
||||
sleepDurationMS := framework.TestContext.Cadvisor.SleepDurationMS
|
||||
if sleepDurationMS == 0 {
|
||||
sleepDurationMS = 10000
|
||||
framework.Logf("Overriding default milliseconds value of zero to %d", sleepDurationMS)
|
||||
}
|
||||
|
||||
return retry, time.Duration(sleepDurationMS) * time.Millisecond
|
||||
}
|
||||
|
||||
maxRetries, sleepDuration := readConfig()
|
||||
for {
|
||||
errors = []error{}
|
||||
|
@ -79,6 +79,7 @@ type TestContextType struct {
|
||||
NodeTestContextType
|
||||
|
||||
// Viper-only parameters. These will in time replace all flags.
|
||||
|
||||
// Example: Create a file 'e2e.json' with the following:
|
||||
// "Cadvisor":{
|
||||
// "MaxRetries":"6"
|
||||
@ -89,6 +90,11 @@ type TestContextType struct {
|
||||
MaxRetries int
|
||||
SleepDurationMS int
|
||||
}
|
||||
|
||||
LoggingSoak struct {
|
||||
Scale int
|
||||
MilliSecondsBetweenWaves int
|
||||
}
|
||||
}
|
||||
|
||||
// NodeTestContextType is part of TestContextType, it is shared by all node e2e test.
|
||||
@ -205,7 +211,6 @@ func RegisterNodeFlags() {
|
||||
|
||||
// Enable viper configuration management of flags.
|
||||
func ViperizeFlags() {
|
||||
|
||||
// TODO @jayunit100: Maybe a more elegant viper-flag integration for the future?
|
||||
// For now, we layer it on top, because 'flag' deps of 'go test' make pflag wrappers
|
||||
// fragile, seeming to force 'flag' to have deep awareness of pflag params.
|
||||
|
136
test/e2e/logging_soak.go
Normal file
136
test/e2e/logging_soak.go
Normal file
@ -0,0 +1,136 @@
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package e2e
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
. "github.com/onsi/ginkgo"
|
||||
. "github.com/onsi/gomega"
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
var _ = framework.KubeDescribe("Logging soak [Performance] [Slow] [Disruptive]", func() {
|
||||
|
||||
f := framework.NewDefaultFramework("logging-soak")
|
||||
|
||||
// Not a global constant (irrelevant outside this test), also not a parameter (if you want more logs, use --scale=).
|
||||
kbRateInSeconds := 1 * time.Second
|
||||
totalLogTime := 2 * time.Minute
|
||||
|
||||
// This test is designed to run and confirm that logs are being generated at a large scale, and that they can be grabbed by the kubelet.
|
||||
// By running it repeatedly in the background, you can simulate large collections of chatty containers.
|
||||
// This can expose problems in your docker configuration (logging), log searching infrastructure, to tune deployments to match high load
|
||||
// scenarios. TODO jayunit100 add this to the kube CI in a follow on infra patch.
|
||||
|
||||
// Returns scale (how many waves of pods).
|
||||
// Returns wave interval (how many seconds to wait before dumping the next wave of pods).
|
||||
readConfig := func() (int, time.Duration) {
|
||||
// Read in configuration settings, reasonable defaults.
|
||||
scale := framework.TestContext.LoggingSoak.Scale
|
||||
if framework.TestContext.LoggingSoak.Scale == 0 {
|
||||
scale = 1
|
||||
framework.Logf("Overriding default scale value of zero to %d", scale)
|
||||
}
|
||||
|
||||
milliSecondsBetweenWaves := framework.TestContext.LoggingSoak.MilliSecondsBetweenWaves
|
||||
if milliSecondsBetweenWaves == 0 {
|
||||
milliSecondsBetweenWaves = 5000
|
||||
framework.Logf("Overriding default milliseconds value of zero to %d", milliSecondsBetweenWaves)
|
||||
}
|
||||
|
||||
return scale, time.Duration(milliSecondsBetweenWaves) * time.Millisecond
|
||||
}
|
||||
|
||||
scale, millisecondsBetweenWaves := readConfig()
|
||||
It(fmt.Sprintf("should survive logging 1KB every %v seconds, for a duration of %v, scaling up to %v pods per node", kbRateInSeconds, totalLogTime, scale), func() {
|
||||
defer GinkgoRecover()
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(scale)
|
||||
for i := 0; i < scale; i++ {
|
||||
go func() {
|
||||
wave := fmt.Sprintf("wave%v", strconv.Itoa(i))
|
||||
framework.Logf("Starting logging soak, wave = %v", wave)
|
||||
RunLogPodsWithSleepOf(f, kbRateInSeconds, wave, totalLogTime)
|
||||
framework.Logf("Completed logging soak, wave %v", i)
|
||||
wg.Done()
|
||||
}()
|
||||
// Niceness.
|
||||
time.Sleep(millisecondsBetweenWaves)
|
||||
}
|
||||
framework.Logf("Waiting on all %v logging soak waves to complete", scale)
|
||||
wg.Wait()
|
||||
})
|
||||
})
|
||||
|
||||
// RunLogPodsWithSleepOf creates a pod on every node, logs continuously (with "sleep" pauses), and verifies that the log string
|
||||
// was produced in each and every pod at least once. The final arg is the timeout for the test to verify all the pods got logs.
|
||||
func RunLogPodsWithSleepOf(f *framework.Framework, sleep time.Duration, podname string, timeout time.Duration) {
|
||||
|
||||
nodes := framework.GetReadySchedulableNodesOrDie(f.Client)
|
||||
totalPods := len(nodes.Items)
|
||||
Expect(totalPods).NotTo(Equal(0))
|
||||
|
||||
kilobyte := strings.Repeat("logs-123", 128) // 8*128=1024 = 1KB of text.
|
||||
|
||||
appName := "logging-soak" + podname
|
||||
podlables := f.CreatePodsPerNodeForSimpleApp(
|
||||
appName,
|
||||
func(n api.Node) api.PodSpec {
|
||||
return api.PodSpec{
|
||||
Containers: []api.Container{{
|
||||
Name: "logging-soak",
|
||||
Image: "gcr.io/google_containers/busybox:1.24",
|
||||
Args: []string{
|
||||
"/bin/sh",
|
||||
"-c",
|
||||
fmt.Sprintf("while true ; do echo %v ; sleep %v; done", kilobyte, sleep.Seconds()),
|
||||
},
|
||||
}},
|
||||
NodeName: n.Name,
|
||||
RestartPolicy: api.RestartPolicyAlways,
|
||||
}
|
||||
},
|
||||
totalPods,
|
||||
)
|
||||
|
||||
logSoakVerification := f.NewClusterVerification(
|
||||
framework.PodStateVerification{
|
||||
Selectors: podlables,
|
||||
ValidPhases: []api.PodPhase{api.PodRunning, api.PodSucceeded},
|
||||
// we don't validate total log data, since there is no gaurantee all logs will be stored forever.
|
||||
// instead, we just validate that some logs are being created in std out.
|
||||
Verify: func(p api.Pod) (bool, error) {
|
||||
s, err := framework.LookForStringInLog(f.Namespace.Name, p.Name, "logging-soak", "logs-123", 1*time.Second)
|
||||
return s != "", err
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
largeClusterForgiveness := time.Duration(len(nodes.Items)/5) * time.Second // i.e. a 100 node cluster gets an extra 20 seconds to complete.
|
||||
pods, err := logSoakVerification.WaitFor(totalPods, timeout+largeClusterForgiveness)
|
||||
|
||||
if err != nil {
|
||||
framework.Failf("Error in wait... %v", err)
|
||||
} else if len(pods) < totalPods {
|
||||
framework.Failf("Only got %v out of %v", len(pods), totalPods)
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user