scheduler_perf: add logging flags

This enables testing of different real production configurations (JSON
vs. text, different log levels, contextual logging).
This commit is contained in:
Patrick Ohly 2023-02-27 20:36:02 +01:00
parent 00d1459530
commit 961129c5f1
4 changed files with 215 additions and 6 deletions

View File

@ -0,0 +1,89 @@
/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package framework
import (
"flag"
"io"
"testing"
"k8s.io/klog/v2"
)
// RedirectKlog modifies the global klog logger so that it writes via the given
// writer. This only works when different tests run sequentially.
//
// The returned cleanup function restores the previous state. Beware that it is
// not thread-safe, all goroutines which call klog must have been stopped.
func RedirectKlog(tb testing.TB, output io.Writer) func() {
expectNoError := func(err error) {
if err != nil {
tb.Fatalf("unexpected error: %v", err)
}
}
state := klog.CaptureState()
defer func() {
if r := recover(); r != nil {
state.Restore()
panic(r)
}
}()
var fs flag.FlagSet
klog.InitFlags(&fs)
expectNoError(fs.Set("log_file", "/dev/null"))
expectNoError(fs.Set("logtostderr", "false"))
expectNoError(fs.Set("alsologtostderr", "false"))
expectNoError(fs.Set("stderrthreshold", "10"))
expectNoError(fs.Set("one_output", "true"))
klog.SetOutput(output)
return state.Restore
}
// NewTBWriter creates an io.Writer which turns each write into a tb.Log call.
//
// Note that no attempts are made to determine the actual call site because
// our caller doesn't know about the TB instance and thus cannot mark itself
// as helper. Therefore the code here doesn't do it either and thus shows up
// as call site in the testing output. To avoid that, contextual logging
// and ktesting have to be used.
func NewTBWriter(tb testing.TB) io.Writer {
return testingWriter{TB: tb}
}
type testingWriter struct {
testing.TB
}
func (tw testingWriter) Write(data []byte) (int, error) {
logLen := len(data)
if logLen == 0 {
return 0, nil
}
// Trim trailing line break? Log will add it.
if data[logLen-1] == '\n' {
logLen--
}
// We could call TB.Helper here, but that doesn't really help because
// then our caller (code in klog) will be reported instead, which isn't
// right either. klog would have to call TB.Helper itself, but doesn't
// know about the TB instance.
tw.Log(string(data[:logLen]))
return len(data), nil
}
var _ io.Writer = testingWriter{}

View File

@ -77,3 +77,15 @@ The configuration file under `config/performance-config.yaml` contains a default
various scenarios. In case you want to add your own, you can extend the list with new templates.
It's also possible to extend `op` data type, respectively its underlying data types
to extend configuration of possible test cases.
### Logging
The default verbosity is 2 (the recommended value for production). -v can be
used to change this. The log format can be changed with
-logging-format=text|json. The default is to write into a log file (when using
the text format) or stderr (when using JSON). Together these options allow
simulating different real production configurations and to compare their
performance.
During interactive debugging sessions it is possible to enable per-test output
via -use-testing-log.

View File

@ -18,17 +18,55 @@ package benchmark
import (
"flag"
"fmt"
"os"
"strings"
"testing"
"k8s.io/klog/v2/ktesting"
"k8s.io/kubernetes/test/integration/framework"
"k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/component-base/featuregate"
"k8s.io/component-base/logs"
logsapi "k8s.io/component-base/logs/api/v1"
_ "k8s.io/component-base/logs/json/register"
"k8s.io/kubernetes/test/utils/ktesting"
)
func TestMain(m *testing.M) {
// Run with -v=2, this is the default log level in production.
ktesting.DefaultConfig = ktesting.NewConfig(ktesting.Verbosity(2))
ktesting.DefaultConfig.AddFlags(flag.CommandLine)
ktesting.SetDefaultVerbosity(2)
// test/integration/framework/flags.go unconditionally initializes the
// logging flags. That's correct for most tests, but in the
// scheduler_perf test we want more control over the flags, therefore
// here strip them out.
var fs flag.FlagSet
flag.CommandLine.VisitAll(func(f *flag.Flag) {
switch f.Name {
case "log-flush-frequency", "v", "vmodule":
// These will be added below ourselves, don't copy.
default:
fs.Var(f.Value, f.Name, f.Usage)
}
})
flag.CommandLine = &fs
featureGate := featuregate.NewFeatureGate()
runtime.Must(logsapi.AddFeatureGates(featureGate))
flag.Var(featureGate, "feature-gate",
"A set of key=value pairs that describe feature gates for alpha/experimental features. "+
"Options are:\n"+strings.Join(featureGate.KnownFeatures(), "\n"))
c := logsapi.NewLoggingConfiguration()
// This would fail if we hadn't removed the logging flags above.
logsapi.AddGoFlags(c, flag.CommandLine)
flag.Parse()
framework.EtcdMain(m.Run)
logs.InitLogs()
if err := logsapi.ValidateAndApply(c, featureGate); err != nil {
fmt.Fprintf(os.Stderr, "%v\n", err)
os.Exit(1)
}
m.Run()
}

View File

@ -19,9 +19,13 @@ package benchmark
import (
"context"
"encoding/json"
"flag"
"fmt"
"io"
"io/ioutil"
"math"
"os"
"path"
"strings"
"sync"
"testing"
@ -48,6 +52,7 @@ import (
"k8s.io/kubernetes/pkg/scheduler/apis/config/validation"
"k8s.io/kubernetes/test/integration/framework"
testutils "k8s.io/kubernetes/test/utils"
"k8s.io/kubernetes/test/utils/ktesting"
"sigs.k8s.io/yaml"
)
@ -570,6 +575,39 @@ func (so sleepOp) patchParams(_ *workload) (realOp, error) {
return &so, nil
}
var useTestingLog = flag.Bool("use-testing-log", false, "Write log entries with testing.TB.Log. This is more suitable for unit testing and debugging, but less realistic in real benchmarks.")
func initTestOutput(tb testing.TB) io.Writer {
var output io.Writer
if *useTestingLog {
output = framework.NewTBWriter(tb)
} else {
tmpDir := tb.TempDir()
logfileName := path.Join(tmpDir, "output.log")
fileOutput, err := os.Create(logfileName)
if err != nil {
tb.Fatalf("create log file: %v", err)
}
output = fileOutput
tb.Cleanup(func() {
// Dump the log output when the test is done. The user
// can decide how much of it will be visible in case of
// success: then "go test" truncates, "go test -v"
// doesn't. All of it will be shown for a failure.
if err := fileOutput.Close(); err != nil {
tb.Fatalf("close log file: %v", err)
}
log, err := ioutil.ReadFile(logfileName)
if err != nil {
tb.Fatalf("read log file: %v", err)
}
tb.Logf("full log output:\n%s", string(log))
})
}
return output
}
func BenchmarkPerfScheduling(b *testing.B) {
testCases, err := getTestCases(configFile)
if err != nil {
@ -579,13 +617,45 @@ func BenchmarkPerfScheduling(b *testing.B) {
b.Fatal(err)
}
output := initTestOutput(b)
// Because we run sequentially, it is possible to change the global
// klog logger and redirect log output. Quite a lot of code still uses
// it instead of supporting contextual logging.
//
// Because we leak one goroutine which calls klog, we cannot restore
// the previous state.
_ = framework.RedirectKlog(b, output)
dataItems := DataItems{Version: "v1"}
for _, tc := range testCases {
b.Run(tc.Name, func(b *testing.B) {
for _, w := range tc.Workloads {
b.Run(w.Name, func(b *testing.B) {
// Ensure that there are no leaked
// goroutines. They could influence
// performance of the next benchmark.
// This must *after* RedirectKlog
// because then during cleanup, the
// test will wait for goroutines to
// quit *before* restoring klog settings.
framework.GoleakCheck(b)
ctx := context.Background()
if *useTestingLog {
// In addition to redirection klog
// output, also enable contextual
// logging.
_, ctx = ktesting.NewTestContext(b)
}
// Now that we are ready to run, start
// etcd.
framework.StartEtcd(b, output)
// 30 minutes should be plenty enough even for the 5000-node tests.
ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Minute)
ctx, cancel := context.WithTimeout(ctx, 30*time.Minute)
b.Cleanup(cancel)
for feature, flag := range tc.FeatureGates {