diff --git a/cmd/kube-apiserver/app/testing/testserver.go b/cmd/kube-apiserver/app/testing/testserver.go index 6969cb70662..3c3b9e55ecd 100644 --- a/cmd/kube-apiserver/app/testing/testserver.go +++ b/cmd/kube-apiserver/app/testing/testserver.go @@ -150,7 +150,11 @@ func NewDefaultTestServerOptions() *TestServerInstanceOptions { // files that because Golang testing's call to os.Exit will not give a stop channel go routine // enough time to remove temporary files. func StartTestServer(t ktesting.TB, instanceOptions *TestServerInstanceOptions, customFlags []string, storageConfig *storagebackend.Config) (result TestServer, err error) { - tCtx := ktesting.Init(t) + // Some callers may have initialize ktesting already. + tCtx, ok := t.(ktesting.TContext) + if !ok { + tCtx = ktesting.Init(t) + } if instanceOptions == nil { instanceOptions = NewDefaultTestServerOptions() diff --git a/test/integration/framework/etcd.go b/test/integration/framework/etcd.go index d816f623dca..9f2f4b943c2 100644 --- a/test/integration/framework/etcd.go +++ b/test/integration/framework/etcd.go @@ -26,6 +26,7 @@ import ( "os/exec" "strconv" "strings" + "sync" "syscall" "testing" "time" @@ -146,7 +147,10 @@ func RunCustomEtcd(dataDir string, customFlags []string, output io.Writer) (url // try to exit etcd gracefully defer cancel() cmd.Process.Signal(syscall.SIGTERM) + var wg sync.WaitGroup + wg.Add(1) go func() { + defer wg.Done() select { case <-ctx.Done(): klog.Infof("etcd exited gracefully, context cancelled") @@ -156,6 +160,7 @@ func RunCustomEtcd(dataDir string, customFlags []string, output io.Writer) (url } }() err := cmd.Wait() + wg.Wait() klog.Infof("etcd exit status: %v", err) err = os.RemoveAll(etcdDataDir) if err != nil { diff --git a/test/integration/framework/perf_utils.go b/test/integration/framework/perf_utils.go index bb1d2704b82..566308355fc 100644 --- a/test/integration/framework/perf_utils.go +++ b/test/integration/framework/perf_utils.go @@ -18,6 +18,7 @@ package framework import ( "context" + "fmt" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" @@ -109,20 +110,19 @@ func (p *IntegrationTestNodePreparer) PrepareNodes(ctx context.Context, nextNode } } if err != nil { - klog.Fatalf("Error creating node: %v", err) + return fmt.Errorf("creating node: %w", err) } } nodes, err := waitListAllNodes(p.client) if err != nil { - klog.Fatalf("Error listing nodes: %v", err) + return fmt.Errorf("listing nodes: %w", err) } index := nextNodeIndex for _, v := range p.countToStrategy { for i := 0; i < v.Count; i, index = i+1, index+1 { if err := testutils.DoPrepareNode(ctx, p.client, &nodes.Items[index], v.Strategy); err != nil { - klog.Errorf("Aborting node preparation: %v", err) - return err + return fmt.Errorf("aborting node preparation: %w", err) } } } diff --git a/test/integration/scheduler_perf/README.md b/test/integration/scheduler_perf/README.md index 46d7423cad3..682c7afd991 100644 --- a/test/integration/scheduler_perf/README.md +++ b/test/integration/scheduler_perf/README.md @@ -102,6 +102,15 @@ performance. During interactive debugging sessions it is possible to enable per-test output via -use-testing-log. +Log output can be quite large, in particular when running the large benchmarks +and when not using -use-testing-log. For benchmarks, we want to produce that +log output in a realistic way (= write to disk using the normal logging +backends) and only capture the output of a specific test as part of the job +results when that test failed. Therefore each test redirects its own output if +the ARTIFACTS env variable is set to a `$ARTIFACTS/.log` file and +removes that file only if the test passed. + + ### Integration tests To run integration tests, use: diff --git a/test/integration/scheduler_perf/main_test.go b/test/integration/scheduler_perf/main_test.go index 86a4eaa337d..632693f8765 100644 --- a/test/integration/scheduler_perf/main_test.go +++ b/test/integration/scheduler_perf/main_test.go @@ -23,23 +23,15 @@ import ( "strings" "testing" - "k8s.io/apimachinery/pkg/util/runtime" - "k8s.io/component-base/featuregate" "k8s.io/component-base/logs" logsapi "k8s.io/component-base/logs/api/v1" _ "k8s.io/component-base/logs/json/register" "k8s.io/kubernetes/test/utils/ktesting" ) -// Run with -v=2, this is the default log level in production. -// -// In a PR this can be bumped up temporarily to run pull-kubernetes-scheduler-perf -// with more log output. -const defaultVerbosity = 2 - func TestMain(m *testing.M) { // Run with -v=2, this is the default log level in production. - ktesting.SetDefaultVerbosity(defaultVerbosity) + ktesting.SetDefaultVerbosity(DefaultLoggingVerbosity) // test/integration/framework/flags.go unconditionally initializes the // logging flags. That's correct for most tests, but in the @@ -56,21 +48,17 @@ func TestMain(m *testing.M) { }) flag.CommandLine = &fs - featureGate := featuregate.NewFeatureGate() - runtime.Must(logsapi.AddFeatureGates(featureGate)) - flag.Var(featureGate, "feature-gate", + flag.Var(LoggingFeatureGate, "feature-gate", "A set of key=value pairs that describe feature gates for alpha/experimental features. "+ - "Options are:\n"+strings.Join(featureGate.KnownFeatures(), "\n")) - c := logsapi.NewLoggingConfiguration() - c.Verbosity = defaultVerbosity + "Options are:\n"+strings.Join(LoggingFeatureGate.KnownFeatures(), "\n")) // This would fail if we hadn't removed the logging flags above. - logsapi.AddGoFlags(c, flag.CommandLine) + logsapi.AddGoFlags(LoggingConfig, flag.CommandLine) flag.Parse() logs.InitLogs() - if err := logsapi.ValidateAndApply(c, featureGate); err != nil { + if err := logsapi.ValidateAndApply(LoggingConfig, LoggingFeatureGate); err != nil { fmt.Fprintf(os.Stderr, "%v\n", err) os.Exit(1) } diff --git a/test/integration/scheduler_perf/scheduler_perf.go b/test/integration/scheduler_perf/scheduler_perf.go index af571b21b0f..d08c643b172 100644 --- a/test/integration/scheduler_perf/scheduler_perf.go +++ b/test/integration/scheduler_perf/scheduler_perf.go @@ -26,6 +26,7 @@ import ( "math" "os" "path" + "regexp" "strings" "sync" "testing" @@ -38,6 +39,7 @@ import ( "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/wait" utilfeature "k8s.io/apiserver/pkg/util/feature" cacheddiscovery "k8s.io/client-go/discovery/cached/memory" @@ -48,6 +50,7 @@ import ( "k8s.io/client-go/restmapper" "k8s.io/component-base/featuregate" featuregatetesting "k8s.io/component-base/featuregate/testing" + logsapi "k8s.io/component-base/logs/api/v1" "k8s.io/component-base/metrics/legacyregistry" "k8s.io/klog/v2" "k8s.io/kubernetes/pkg/scheduler/apis/config" @@ -94,6 +97,29 @@ const ( pluginLabelName = "plugin" ) +// Run with -v=2, this is the default log level in production. +// +// In a PR this can be bumped up temporarily to run pull-kubernetes-scheduler-perf +// with more log output. +const DefaultLoggingVerbosity = 2 + +var LoggingFeatureGate FeatureGateFlag +var LoggingConfig *logsapi.LoggingConfiguration + +type FeatureGateFlag interface { + featuregate.FeatureGate + flag.Value +} + +func init() { + f := featuregate.NewFeatureGate() + runtime.Must(logsapi.AddFeatureGates(f)) + LoggingFeatureGate = f + + LoggingConfig = logsapi.NewLoggingConfiguration() + LoggingConfig.Verbosity = DefaultLoggingVerbosity +} + var ( defaultMetricsCollectorConfig = metricsCollectorConfig{ Metrics: map[string][]*labelValues{ @@ -760,8 +786,62 @@ func initTestOutput(tb testing.TB) io.Writer { var perfSchedulingLabelFilter = flag.String("perf-scheduling-label-filter", "performance", "comma-separated list of labels which a testcase must have (no prefix or +) or must not have (-), used by BenchmarkPerfScheduling") +var specialFilenameChars = regexp.MustCompile(`[^a-zA-Z0-9-_]`) + func setupTestCase(t testing.TB, tc *testCase, output io.Writer, outOfTreePluginRegistry frameworkruntime.Registry) (informers.SharedInformerFactory, ktesting.TContext) { tCtx := ktesting.Init(t, initoption.PerTestOutput(*useTestingLog)) + artifacts, doArtifacts := os.LookupEnv("ARTIFACTS") + if !*useTestingLog && doArtifacts { + // Reconfigure logging so that it goes to a separate file per + // test instead of stderr. If the test passes, the file gets + // deleted. The overall output can be very large (> 200 MB for + // ci-benchmark-scheduler-perf-master). With this approach, we + // have log output for failures without having to store large + // amounts of data that no-one is looking at. The performance + // is the same as writing to stderr. + if err := logsapi.ResetForTest(LoggingFeatureGate); err != nil { + t.Fatalf("Failed to reset the logging configuration: %v", err) + } + logfileName := path.Join(artifacts, specialFilenameChars.ReplaceAllString(t.Name(), "_")+".log") + out, err := os.Create(logfileName) + if err != nil { + t.Fatalf("Failed to create per-test log output file: %v", err) + } + t.Cleanup(func() { + // Everything should have stopped by now, checked below + // by GoleakCheck (which runs first during test + // shutdown!). Therefore we can clean up. Errors get logged + // and fail the test, but cleanup tries to continue. + // + // Note that the race detector will flag any goroutine + // as causing a race if there is no explicit wait for + // that goroutine to stop. We know that they must have + // stopped (GoLeakCheck!) but the race detector + // doesn't. + // + // This is a major issue because many Kubernetes goroutines get + // started without waiting for them to stop :-( + if err := logsapi.ResetForTest(LoggingFeatureGate); err != nil { + t.Errorf("Failed to reset the logging configuration: %v", err) + } + if err := out.Close(); err != nil { + t.Errorf("Failed to close the per-test log output file: %s: %v", logfileName, err) + } + if !t.Failed() { + if err := os.Remove(logfileName); err != nil { + t.Errorf("Failed to remove the per-test log output file: %v", err) + } + } + }) + opts := &logsapi.LoggingOptions{ + ErrorStream: out, + InfoStream: out, + } + if err := logsapi.ValidateAndApplyWithOptions(LoggingConfig, opts, LoggingFeatureGate); err != nil { + t.Fatalf("Failed to apply the per-test logging configuration: %v", err) + } + + } // Ensure that there are no leaked // goroutines. They could influence diff --git a/test/utils/ktesting/contexthelper.go b/test/utils/ktesting/contexthelper.go index 28d2b245934..bcfdd4d284e 100644 --- a/test/utils/ktesting/contexthelper.go +++ b/test/utils/ktesting/contexthelper.go @@ -59,6 +59,14 @@ func withTimeout(ctx context.Context, tb TB, timeout time.Duration, timeoutCause // No need to set a cause here. The cause or error of // the parent context will be used. case <-after.C: + // Code using this tCtx may or may not log the + // information above when it runs into the + // cancellation. It's better if we do it, just to be on + // the safe side. + // + // Would be nice to log this with the source code location + // of our caller, but testing.Logf does not support that. + tb.Logf("\nWARNING: %s\n", timeoutCause) cancel(canceledError(timeoutCause)) } }()