From 6ee2c6313591b9ce23c1d47a6ddb4a8fe1df0705 Mon Sep 17 00:00:00 2001
From: Patrick Ohly <patrick.ohly@intel.com>
Date: Fri, 23 Aug 2024 14:20:17 +0200
Subject: [PATCH 1/5] testserver: allow caller to set up ktesting

In scheduler_perf, the caller configures ktesting without per-test output. When
StartTestServer initialized it again, it did so with per-test output. In other
cases it might have been simply redundant.
---
 cmd/kube-apiserver/app/testing/testserver.go | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/cmd/kube-apiserver/app/testing/testserver.go b/cmd/kube-apiserver/app/testing/testserver.go
index 6969cb70662..3c3b9e55ecd 100644
--- a/cmd/kube-apiserver/app/testing/testserver.go
+++ b/cmd/kube-apiserver/app/testing/testserver.go
@@ -150,7 +150,11 @@ func NewDefaultTestServerOptions() *TestServerInstanceOptions {
 // files that because Golang testing's call to os.Exit will not give a stop channel go routine
 // enough time to remove temporary files.
 func StartTestServer(t ktesting.TB, instanceOptions *TestServerInstanceOptions, customFlags []string, storageConfig *storagebackend.Config) (result TestServer, err error) {
-	tCtx := ktesting.Init(t)
+	// Some callers may have initialize ktesting already.
+	tCtx, ok := t.(ktesting.TContext)
+	if !ok {
+		tCtx = ktesting.Init(t)
+	}
 
 	if instanceOptions == nil {
 		instanceOptions = NewDefaultTestServerOptions()

From cc9234cffdbcc1de0eb6e1a319b853dc7f759f2b Mon Sep 17 00:00:00 2001
From: Patrick Ohly <patrick.ohly@intel.com>
Date: Fri, 23 Aug 2024 14:23:03 +0200
Subject: [PATCH 2/5] test etcd: avoid potential data race

If the caller needs to reconfigure klog, then calling klog without proper
synchronizing while stopping causes a data race. We have to ensure that
the goroutine has terminated before stop returns.
---
 test/integration/framework/etcd.go | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/test/integration/framework/etcd.go b/test/integration/framework/etcd.go
index d816f623dca..9f2f4b943c2 100644
--- a/test/integration/framework/etcd.go
+++ b/test/integration/framework/etcd.go
@@ -26,6 +26,7 @@ import (
 	"os/exec"
 	"strconv"
 	"strings"
+	"sync"
 	"syscall"
 	"testing"
 	"time"
@@ -146,7 +147,10 @@ func RunCustomEtcd(dataDir string, customFlags []string, output io.Writer) (url
 		// try to exit etcd gracefully
 		defer cancel()
 		cmd.Process.Signal(syscall.SIGTERM)
+		var wg sync.WaitGroup
+		wg.Add(1)
 		go func() {
+			defer wg.Done()
 			select {
 			case <-ctx.Done():
 				klog.Infof("etcd exited gracefully, context cancelled")
@@ -156,6 +160,7 @@ func RunCustomEtcd(dataDir string, customFlags []string, output io.Writer) (url
 			}
 		}()
 		err := cmd.Wait()
+		wg.Wait()
 		klog.Infof("etcd exit status: %v", err)
 		err = os.RemoveAll(etcdDataDir)
 		if err != nil {

From 5447d28c0d47d5236101001a224e940f2fce808c Mon Sep 17 00:00:00 2001
From: Patrick Ohly <patrick.ohly@intel.com>
Date: Fri, 23 Aug 2024 14:24:43 +0200
Subject: [PATCH 3/5] ktesting: log warning on timeout

How exactly a test reacts when its context times out is unclear. In the case of
scheduler_perf, the apiserver started to shut down and the test failure then
was about not being able to reach the apiserver, which was a bit confusing.

To make it more obvious why the shutdown starts, a WARNING message gets added
to the test output by ktesting before cancellation and thus before any other
output related to that cancellation.
---
 test/utils/ktesting/contexthelper.go | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/test/utils/ktesting/contexthelper.go b/test/utils/ktesting/contexthelper.go
index 28d2b245934..bcfdd4d284e 100644
--- a/test/utils/ktesting/contexthelper.go
+++ b/test/utils/ktesting/contexthelper.go
@@ -59,6 +59,14 @@ func withTimeout(ctx context.Context, tb TB, timeout time.Duration, timeoutCause
 			// No need to set a cause here. The cause or error of
 			// the parent context will be used.
 		case <-after.C:
+			// Code using this tCtx may or may not log the
+			// information above when it runs into the
+			// cancellation. It's better if we do it, just to be on
+			// the safe side.
+			//
+			// Would be nice to log this with the source code location
+			// of our caller, but testing.Logf does not support that.
+			tb.Logf("\nWARNING: %s\n", timeoutCause)
 			cancel(canceledError(timeoutCause))
 		}
 	}()

From 6cbdef850ce83a8a2d507ec0d2052f5adbc72ed5 Mon Sep 17 00:00:00 2001
From: Patrick Ohly <patrick.ohly@intel.com>
Date: Fri, 23 Aug 2024 14:27:26 +0200
Subject: [PATCH 4/5] scheduler_perf: normal error handling during node
 creation

Calling klog.Fatalf is not a good way to report problems. It kills the entire
test run, without given other code a chance to react.
---
 test/integration/framework/perf_utils.go | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/test/integration/framework/perf_utils.go b/test/integration/framework/perf_utils.go
index bb1d2704b82..566308355fc 100644
--- a/test/integration/framework/perf_utils.go
+++ b/test/integration/framework/perf_utils.go
@@ -18,6 +18,7 @@ package framework
 
 import (
 	"context"
+	"fmt"
 
 	v1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/resource"
@@ -109,20 +110,19 @@ func (p *IntegrationTestNodePreparer) PrepareNodes(ctx context.Context, nextNode
 			}
 		}
 		if err != nil {
-			klog.Fatalf("Error creating node: %v", err)
+			return fmt.Errorf("creating node: %w", err)
 		}
 	}
 
 	nodes, err := waitListAllNodes(p.client)
 	if err != nil {
-		klog.Fatalf("Error listing nodes: %v", err)
+		return fmt.Errorf("listing nodes: %w", err)
 	}
 	index := nextNodeIndex
 	for _, v := range p.countToStrategy {
 		for i := 0; i < v.Count; i, index = i+1, index+1 {
 			if err := testutils.DoPrepareNode(ctx, p.client, &nodes.Items[index], v.Strategy); err != nil {
-				klog.Errorf("Aborting node preparation: %v", err)
-				return err
+				return fmt.Errorf("aborting node preparation: %w", err)
 			}
 		}
 	}

From bf1188d292ad65f01ba3e3ded48f45c24640216e Mon Sep 17 00:00:00 2001
From: Patrick Ohly <patrick.ohly@intel.com>
Date: Fri, 23 Aug 2024 14:28:43 +0200
Subject: [PATCH 5/5] scheduler_perf: only store log output after failures

Reconfiguring the logging infrastructure with a per-test output file mimicks
the behavior of per-test output (log output captured only on failures) while
still using the normal logging code, which is important for benchmarking.

To enable this behavior, the ARTIFACT env variable must be set.
---
 test/integration/scheduler_perf/README.md     |  9 +++
 test/integration/scheduler_perf/main_test.go  | 22 ++---
 .../scheduler_perf/scheduler_perf.go          | 80 +++++++++++++++++++
 3 files changed, 94 insertions(+), 17 deletions(-)

diff --git a/test/integration/scheduler_perf/README.md b/test/integration/scheduler_perf/README.md
index 287ed486244..376d471cc2d 100644
--- a/test/integration/scheduler_perf/README.md
+++ b/test/integration/scheduler_perf/README.md
@@ -102,6 +102,15 @@ performance.
 During interactive debugging sessions it is possible to enable per-test output
 via -use-testing-log.
 
+Log output can be quite large, in particular when running the large benchmarks
+and when not using -use-testing-log. For benchmarks, we want to produce that
+log output in a realistic way (= write to disk using the normal logging
+backends) and only capture the output of a specific test as part of the job
+results when that test failed. Therefore each test redirects its own output if
+the ARTIFACTS env variable is set to a `$ARTIFACTS/<test name>.log` file and
+removes that file only if the test passed.
+
+
 ### Integration tests
 
 To run integration tests, use:
diff --git a/test/integration/scheduler_perf/main_test.go b/test/integration/scheduler_perf/main_test.go
index 86a4eaa337d..632693f8765 100644
--- a/test/integration/scheduler_perf/main_test.go
+++ b/test/integration/scheduler_perf/main_test.go
@@ -23,23 +23,15 @@ import (
 	"strings"
 	"testing"
 
-	"k8s.io/apimachinery/pkg/util/runtime"
-	"k8s.io/component-base/featuregate"
 	"k8s.io/component-base/logs"
 	logsapi "k8s.io/component-base/logs/api/v1"
 	_ "k8s.io/component-base/logs/json/register"
 	"k8s.io/kubernetes/test/utils/ktesting"
 )
 
-// Run with -v=2, this is the default log level in production.
-//
-// In a PR this can be bumped up temporarily to run pull-kubernetes-scheduler-perf
-// with more log output.
-const defaultVerbosity = 2
-
 func TestMain(m *testing.M) {
 	// Run with -v=2, this is the default log level in production.
-	ktesting.SetDefaultVerbosity(defaultVerbosity)
+	ktesting.SetDefaultVerbosity(DefaultLoggingVerbosity)
 
 	// test/integration/framework/flags.go unconditionally initializes the
 	// logging flags. That's correct for most tests, but in the
@@ -56,21 +48,17 @@ func TestMain(m *testing.M) {
 	})
 	flag.CommandLine = &fs
 
-	featureGate := featuregate.NewFeatureGate()
-	runtime.Must(logsapi.AddFeatureGates(featureGate))
-	flag.Var(featureGate, "feature-gate",
+	flag.Var(LoggingFeatureGate, "feature-gate",
 		"A set of key=value pairs that describe feature gates for alpha/experimental features. "+
-			"Options are:\n"+strings.Join(featureGate.KnownFeatures(), "\n"))
-	c := logsapi.NewLoggingConfiguration()
-	c.Verbosity = defaultVerbosity
+			"Options are:\n"+strings.Join(LoggingFeatureGate.KnownFeatures(), "\n"))
 
 	// This would fail if we hadn't removed the logging flags above.
-	logsapi.AddGoFlags(c, flag.CommandLine)
+	logsapi.AddGoFlags(LoggingConfig, flag.CommandLine)
 
 	flag.Parse()
 
 	logs.InitLogs()
-	if err := logsapi.ValidateAndApply(c, featureGate); err != nil {
+	if err := logsapi.ValidateAndApply(LoggingConfig, LoggingFeatureGate); err != nil {
 		fmt.Fprintf(os.Stderr, "%v\n", err)
 		os.Exit(1)
 	}
diff --git a/test/integration/scheduler_perf/scheduler_perf.go b/test/integration/scheduler_perf/scheduler_perf.go
index 5b34ec6ce7f..bfa05a10e85 100644
--- a/test/integration/scheduler_perf/scheduler_perf.go
+++ b/test/integration/scheduler_perf/scheduler_perf.go
@@ -26,6 +26,7 @@ import (
 	"math"
 	"os"
 	"path"
+	"regexp"
 	"strings"
 	"sync"
 	"testing"
@@ -38,6 +39,7 @@ import (
 	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
 	"k8s.io/apimachinery/pkg/labels"
 	"k8s.io/apimachinery/pkg/runtime/schema"
+	"k8s.io/apimachinery/pkg/util/runtime"
 	"k8s.io/apimachinery/pkg/util/wait"
 	utilfeature "k8s.io/apiserver/pkg/util/feature"
 	cacheddiscovery "k8s.io/client-go/discovery/cached/memory"
@@ -48,6 +50,7 @@ import (
 	"k8s.io/client-go/restmapper"
 	"k8s.io/component-base/featuregate"
 	featuregatetesting "k8s.io/component-base/featuregate/testing"
+	logsapi "k8s.io/component-base/logs/api/v1"
 	"k8s.io/component-base/metrics/legacyregistry"
 	"k8s.io/klog/v2"
 	"k8s.io/kubernetes/pkg/scheduler/apis/config"
@@ -94,6 +97,29 @@ const (
 	pluginLabelName          = "plugin"
 )
 
+// Run with -v=2, this is the default log level in production.
+//
+// In a PR this can be bumped up temporarily to run pull-kubernetes-scheduler-perf
+// with more log output.
+const DefaultLoggingVerbosity = 2
+
+var LoggingFeatureGate FeatureGateFlag
+var LoggingConfig *logsapi.LoggingConfiguration
+
+type FeatureGateFlag interface {
+	featuregate.FeatureGate
+	flag.Value
+}
+
+func init() {
+	f := featuregate.NewFeatureGate()
+	runtime.Must(logsapi.AddFeatureGates(f))
+	LoggingFeatureGate = f
+
+	LoggingConfig = logsapi.NewLoggingConfiguration()
+	LoggingConfig.Verbosity = DefaultLoggingVerbosity
+}
+
 var (
 	defaultMetricsCollectorConfig = metricsCollectorConfig{
 		Metrics: map[string][]*labelValues{
@@ -764,8 +790,62 @@ func initTestOutput(tb testing.TB) io.Writer {
 
 var perfSchedulingLabelFilter = flag.String("perf-scheduling-label-filter", "performance", "comma-separated list of labels which a testcase must have (no prefix or +) or must not have (-), used by BenchmarkPerfScheduling")
 
+var specialFilenameChars = regexp.MustCompile(`[^a-zA-Z0-9-_]`)
+
 func setupTestCase(t testing.TB, tc *testCase, output io.Writer, outOfTreePluginRegistry frameworkruntime.Registry) (informers.SharedInformerFactory, ktesting.TContext) {
 	tCtx := ktesting.Init(t, initoption.PerTestOutput(*useTestingLog))
+	artifacts, doArtifacts := os.LookupEnv("ARTIFACTS")
+	if !*useTestingLog && doArtifacts {
+		// Reconfigure logging so that it goes to a separate file per
+		// test instead of stderr. If the test passes, the file gets
+		// deleted. The overall output can be very large (> 200 MB for
+		// ci-benchmark-scheduler-perf-master). With this approach, we
+		// have log output for failures without having to store large
+		// amounts of data that no-one is looking at. The performance
+		// is the same as writing to stderr.
+		if err := logsapi.ResetForTest(LoggingFeatureGate); err != nil {
+			t.Fatalf("Failed to reset the logging configuration: %v", err)
+		}
+		logfileName := path.Join(artifacts, specialFilenameChars.ReplaceAllString(t.Name(), "_")+".log")
+		out, err := os.Create(logfileName)
+		if err != nil {
+			t.Fatalf("Failed to create per-test log output file: %v", err)
+		}
+		t.Cleanup(func() {
+			// Everything should have stopped by now, checked below
+			// by GoleakCheck (which runs first during test
+			// shutdown!). Therefore we can clean up. Errors get logged
+			// and fail the test, but cleanup tries to continue.
+			//
+			// Note that the race detector will flag any goroutine
+			// as causing a race if there is no explicit wait for
+			// that goroutine to stop.  We know that they must have
+			// stopped (GoLeakCheck!) but the race detector
+			// doesn't.
+			//
+			// This is a major issue because many Kubernetes goroutines get
+			// started without waiting for them to stop :-(
+			if err := logsapi.ResetForTest(LoggingFeatureGate); err != nil {
+				t.Errorf("Failed to reset the logging configuration: %v", err)
+			}
+			if err := out.Close(); err != nil {
+				t.Errorf("Failed to close the per-test log output file: %s: %v", logfileName, err)
+			}
+			if !t.Failed() {
+				if err := os.Remove(logfileName); err != nil {
+					t.Errorf("Failed to remove the per-test log output file: %v", err)
+				}
+			}
+		})
+		opts := &logsapi.LoggingOptions{
+			ErrorStream: out,
+			InfoStream:  out,
+		}
+		if err := logsapi.ValidateAndApplyWithOptions(LoggingConfig, opts, LoggingFeatureGate); err != nil {
+			t.Fatalf("Failed to apply the per-test logging configuration: %v", err)
+		}
+
+	}
 
 	// Ensure that there are no leaked
 	// goroutines.  They could influence