diff --git a/test/integration/framework/logger.go b/test/integration/framework/logger.go new file mode 100644 index 00000000000..68aca0d2bd6 --- /dev/null +++ b/test/integration/framework/logger.go @@ -0,0 +1,89 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package framework + +import ( + "flag" + "io" + "testing" + + "k8s.io/klog/v2" +) + +// RedirectKlog modifies the global klog logger so that it writes via the given +// writer. This only works when different tests run sequentially. +// +// The returned cleanup function restores the previous state. Beware that it is +// not thread-safe, all goroutines which call klog must have been stopped. +func RedirectKlog(tb testing.TB, output io.Writer) func() { + expectNoError := func(err error) { + if err != nil { + tb.Fatalf("unexpected error: %v", err) + } + } + + state := klog.CaptureState() + defer func() { + if r := recover(); r != nil { + state.Restore() + panic(r) + } + }() + var fs flag.FlagSet + klog.InitFlags(&fs) + expectNoError(fs.Set("log_file", "/dev/null")) + expectNoError(fs.Set("logtostderr", "false")) + expectNoError(fs.Set("alsologtostderr", "false")) + expectNoError(fs.Set("stderrthreshold", "10")) + expectNoError(fs.Set("one_output", "true")) + klog.SetOutput(output) + return state.Restore +} + +// NewTBWriter creates an io.Writer which turns each write into a tb.Log call. +// +// Note that no attempts are made to determine the actual call site because +// our caller doesn't know about the TB instance and thus cannot mark itself +// as helper. Therefore the code here doesn't do it either and thus shows up +// as call site in the testing output. To avoid that, contextual logging +// and ktesting have to be used. +func NewTBWriter(tb testing.TB) io.Writer { + return testingWriter{TB: tb} +} + +type testingWriter struct { + testing.TB +} + +func (tw testingWriter) Write(data []byte) (int, error) { + logLen := len(data) + if logLen == 0 { + return 0, nil + } + // Trim trailing line break? Log will add it. + if data[logLen-1] == '\n' { + logLen-- + } + // We could call TB.Helper here, but that doesn't really help because + // then our caller (code in klog) will be reported instead, which isn't + // right either. klog would have to call TB.Helper itself, but doesn't + // know about the TB instance. + tw.Log(string(data[:logLen])) + return len(data), nil +} + +var _ io.Writer = testingWriter{} diff --git a/test/integration/scheduler_perf/README.md b/test/integration/scheduler_perf/README.md index a9b38a8450f..a10eec0bf49 100644 --- a/test/integration/scheduler_perf/README.md +++ b/test/integration/scheduler_perf/README.md @@ -77,3 +77,15 @@ The configuration file under `config/performance-config.yaml` contains a default various scenarios. In case you want to add your own, you can extend the list with new templates. It's also possible to extend `op` data type, respectively its underlying data types to extend configuration of possible test cases. + +### Logging + +The default verbosity is 2 (the recommended value for production). -v can be +used to change this. The log format can be changed with +-logging-format=text|json. The default is to write into a log file (when using +the text format) or stderr (when using JSON). Together these options allow +simulating different real production configurations and to compare their +performance. + +During interactive debugging sessions it is possible to enable per-test output +via -use-testing-log. diff --git a/test/integration/scheduler_perf/main_test.go b/test/integration/scheduler_perf/main_test.go index 09f3a87136f..2019a255293 100644 --- a/test/integration/scheduler_perf/main_test.go +++ b/test/integration/scheduler_perf/main_test.go @@ -18,17 +18,55 @@ package benchmark import ( "flag" + "fmt" + "os" + "strings" "testing" - "k8s.io/klog/v2/ktesting" - "k8s.io/kubernetes/test/integration/framework" + "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/component-base/featuregate" + "k8s.io/component-base/logs" + logsapi "k8s.io/component-base/logs/api/v1" + _ "k8s.io/component-base/logs/json/register" + "k8s.io/kubernetes/test/utils/ktesting" ) func TestMain(m *testing.M) { // Run with -v=2, this is the default log level in production. - ktesting.DefaultConfig = ktesting.NewConfig(ktesting.Verbosity(2)) - ktesting.DefaultConfig.AddFlags(flag.CommandLine) + ktesting.SetDefaultVerbosity(2) + + // test/integration/framework/flags.go unconditionally initializes the + // logging flags. That's correct for most tests, but in the + // scheduler_perf test we want more control over the flags, therefore + // here strip them out. + var fs flag.FlagSet + flag.CommandLine.VisitAll(func(f *flag.Flag) { + switch f.Name { + case "log-flush-frequency", "v", "vmodule": + // These will be added below ourselves, don't copy. + default: + fs.Var(f.Value, f.Name, f.Usage) + } + }) + flag.CommandLine = &fs + + featureGate := featuregate.NewFeatureGate() + runtime.Must(logsapi.AddFeatureGates(featureGate)) + flag.Var(featureGate, "feature-gate", + "A set of key=value pairs that describe feature gates for alpha/experimental features. "+ + "Options are:\n"+strings.Join(featureGate.KnownFeatures(), "\n")) + c := logsapi.NewLoggingConfiguration() + + // This would fail if we hadn't removed the logging flags above. + logsapi.AddGoFlags(c, flag.CommandLine) + flag.Parse() - framework.EtcdMain(m.Run) + logs.InitLogs() + if err := logsapi.ValidateAndApply(c, featureGate); err != nil { + fmt.Fprintf(os.Stderr, "%v\n", err) + os.Exit(1) + } + + m.Run() } diff --git a/test/integration/scheduler_perf/scheduler_perf_test.go b/test/integration/scheduler_perf/scheduler_perf_test.go index 2eb1b29c76f..18f018a46bf 100644 --- a/test/integration/scheduler_perf/scheduler_perf_test.go +++ b/test/integration/scheduler_perf/scheduler_perf_test.go @@ -19,9 +19,13 @@ package benchmark import ( "context" "encoding/json" + "flag" "fmt" + "io" + "io/ioutil" "math" "os" + "path" "strings" "sync" "testing" @@ -48,6 +52,7 @@ import ( "k8s.io/kubernetes/pkg/scheduler/apis/config/validation" "k8s.io/kubernetes/test/integration/framework" testutils "k8s.io/kubernetes/test/utils" + "k8s.io/kubernetes/test/utils/ktesting" "sigs.k8s.io/yaml" ) @@ -570,6 +575,39 @@ func (so sleepOp) patchParams(_ *workload) (realOp, error) { return &so, nil } +var useTestingLog = flag.Bool("use-testing-log", false, "Write log entries with testing.TB.Log. This is more suitable for unit testing and debugging, but less realistic in real benchmarks.") + +func initTestOutput(tb testing.TB) io.Writer { + var output io.Writer + if *useTestingLog { + output = framework.NewTBWriter(tb) + } else { + tmpDir := tb.TempDir() + logfileName := path.Join(tmpDir, "output.log") + fileOutput, err := os.Create(logfileName) + if err != nil { + tb.Fatalf("create log file: %v", err) + } + output = fileOutput + + tb.Cleanup(func() { + // Dump the log output when the test is done. The user + // can decide how much of it will be visible in case of + // success: then "go test" truncates, "go test -v" + // doesn't. All of it will be shown for a failure. + if err := fileOutput.Close(); err != nil { + tb.Fatalf("close log file: %v", err) + } + log, err := ioutil.ReadFile(logfileName) + if err != nil { + tb.Fatalf("read log file: %v", err) + } + tb.Logf("full log output:\n%s", string(log)) + }) + } + return output +} + func BenchmarkPerfScheduling(b *testing.B) { testCases, err := getTestCases(configFile) if err != nil { @@ -579,13 +617,45 @@ func BenchmarkPerfScheduling(b *testing.B) { b.Fatal(err) } + output := initTestOutput(b) + + // Because we run sequentially, it is possible to change the global + // klog logger and redirect log output. Quite a lot of code still uses + // it instead of supporting contextual logging. + // + // Because we leak one goroutine which calls klog, we cannot restore + // the previous state. + _ = framework.RedirectKlog(b, output) + dataItems := DataItems{Version: "v1"} for _, tc := range testCases { b.Run(tc.Name, func(b *testing.B) { for _, w := range tc.Workloads { b.Run(w.Name, func(b *testing.B) { + // Ensure that there are no leaked + // goroutines. They could influence + // performance of the next benchmark. + // This must *after* RedirectKlog + // because then during cleanup, the + // test will wait for goroutines to + // quit *before* restoring klog settings. + framework.GoleakCheck(b) + + ctx := context.Background() + + if *useTestingLog { + // In addition to redirection klog + // output, also enable contextual + // logging. + _, ctx = ktesting.NewTestContext(b) + } + + // Now that we are ready to run, start + // etcd. + framework.StartEtcd(b, output) + // 30 minutes should be plenty enough even for the 5000-node tests. - ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Minute) + ctx, cancel := context.WithTimeout(ctx, 30*time.Minute) b.Cleanup(cancel) for feature, flag := range tc.FeatureGates {