From 369a18a3a1a248948fad220600bb1fe62afeb47d Mon Sep 17 00:00:00 2001
From: Patrick Ohly <patrick.ohly@intel.com>
Date: Mon, 18 Nov 2024 12:32:59 +0100
Subject: [PATCH 1/3] scheduler_perf: simplify flags, fix output

The "disabled by label filter" message for benchmarks printed the pointer to
the filter string, not the filter string itself. This mistake gets avoided and
the code becomes simpler when not using pointers.
---
 .../scheduler_perf/scheduler_perf.go          | 30 +++++++++----------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/test/integration/scheduler_perf/scheduler_perf.go b/test/integration/scheduler_perf/scheduler_perf.go
index f8aed3a8fe4..0aabc57f5cd 100644
--- a/test/integration/scheduler_perf/scheduler_perf.go
+++ b/test/integration/scheduler_perf/scheduler_perf.go
@@ -207,9 +207,9 @@ var (
 	}
 )
 
-var UseTestingLog *bool
-var PerfSchedulingLabelFilter *string
-var TestSchedulingLabelFilter *string
+var UseTestingLog bool
+var PerfSchedulingLabelFilter string
+var TestSchedulingLabelFilter string
 
 // InitTests should be called in a TestMain in each config subdirectory.
 func InitTests() error {
@@ -235,9 +235,9 @@ func InitTests() error {
 		"A set of key=value pairs that describe feature gates for alpha/experimental features. "+
 			"Options are:\n"+strings.Join(LoggingFeatureGate.KnownFeatures(), "\n"))
 
-	UseTestingLog = flag.Bool("use-testing-log", false, "Write log entries with testing.TB.Log. This is more suitable for unit testing and debugging, but less realistic in real benchmarks.")
-	PerfSchedulingLabelFilter = flag.String("perf-scheduling-label-filter", "performance", "comma-separated list of labels which a testcase must have (no prefix or +) or must not have (-), used by BenchmarkPerfScheduling")
-	TestSchedulingLabelFilter = flag.String("test-scheduling-label-filter", "integration-test,-performance", "comma-separated list of labels which a testcase must have (no prefix or +) or must not have (-), used by TestScheduling")
+	flag.BoolVar(&UseTestingLog, "use-testing-log", false, "Write log entries with testing.TB.Log. This is more suitable for unit testing and debugging, but less realistic in real benchmarks.")
+	flag.StringVar(&PerfSchedulingLabelFilter, "perf-scheduling-label-filter", "performance", "comma-separated list of labels which a testcase must have (no prefix or +) or must not have (-), used by BenchmarkPerfScheduling")
+	flag.StringVar(&TestSchedulingLabelFilter, "test-scheduling-label-filter", "integration-test,-performance", "comma-separated list of labels which a testcase must have (no prefix or +) or must not have (-), used by TestScheduling")
 
 	// This would fail if we hadn't removed the logging flags above.
 	logsapi.AddGoFlags(LoggingConfig, flag.CommandLine)
@@ -988,7 +988,7 @@ func (scm stopCollectingMetricsOp) patchParams(_ *workload) (realOp, error) {
 
 func initTestOutput(tb testing.TB) io.Writer {
 	var output io.Writer
-	if *UseTestingLog {
+	if UseTestingLog {
 		output = framework.NewTBWriter(tb)
 	} else {
 		tmpDir := tb.TempDir()
@@ -1020,9 +1020,9 @@ func initTestOutput(tb testing.TB) io.Writer {
 var specialFilenameChars = regexp.MustCompile(`[^a-zA-Z0-9-_]`)
 
 func setupTestCase(t testing.TB, tc *testCase, featureGates map[featuregate.Feature]bool, output io.Writer, outOfTreePluginRegistry frameworkruntime.Registry) (informers.SharedInformerFactory, ktesting.TContext) {
-	tCtx := ktesting.Init(t, initoption.PerTestOutput(*UseTestingLog))
+	tCtx := ktesting.Init(t, initoption.PerTestOutput(UseTestingLog))
 	artifacts, doArtifacts := os.LookupEnv("ARTIFACTS")
-	if !*UseTestingLog && doArtifacts {
+	if !UseTestingLog && doArtifacts {
 		// Reconfigure logging so that it goes to a separate file per
 		// test instead of stderr. If the test passes, the file gets
 		// deleted. The overall output can be very large (> 200 MB for
@@ -1130,9 +1130,9 @@ func RunBenchmarkPerfScheduling(b *testing.B, configFile string, topicName strin
 	}
 
 	if testing.Short() {
-		*PerfSchedulingLabelFilter += ",+short"
+		PerfSchedulingLabelFilter += ",+short"
 	}
-	testcaseLabelSelectors := strings.Split(*PerfSchedulingLabelFilter, ",")
+	testcaseLabelSelectors := strings.Split(PerfSchedulingLabelFilter, ",")
 
 	output := initTestOutput(b)
 
@@ -1150,7 +1150,7 @@ func RunBenchmarkPerfScheduling(b *testing.B, configFile string, topicName strin
 			for _, w := range tc.Workloads {
 				b.Run(w.Name, func(b *testing.B) {
 					if !enabled(testcaseLabelSelectors, append(tc.Labels, w.Labels...)...) {
-						b.Skipf("disabled by label filter %v", PerfSchedulingLabelFilter)
+						b.Skipf("disabled by label filter %q", PerfSchedulingLabelFilter)
 					}
 
 					featureGates := featureGatesMerge(tc.FeatureGates, w.FeatureGates)
@@ -1244,16 +1244,16 @@ func RunIntegrationPerfScheduling(t *testing.T, configFile string) {
 	}
 
 	if testing.Short() {
-		*TestSchedulingLabelFilter += ",+short"
+		TestSchedulingLabelFilter += ",+short"
 	}
-	testcaseLabelSelectors := strings.Split(*TestSchedulingLabelFilter, ",")
+	testcaseLabelSelectors := strings.Split(TestSchedulingLabelFilter, ",")
 
 	for _, tc := range testCases {
 		t.Run(tc.Name, func(t *testing.T) {
 			for _, w := range tc.Workloads {
 				t.Run(w.Name, func(t *testing.T) {
 					if !enabled(testcaseLabelSelectors, append(tc.Labels, w.Labels...)...) {
-						t.Skipf("disabled by label filter %q", *TestSchedulingLabelFilter)
+						t.Skipf("disabled by label filter %q", TestSchedulingLabelFilter)
 					}
 					featureGates := featureGatesMerge(tc.FeatureGates, w.FeatureGates)
 					informerFactory, tCtx := setupTestCase(t, tc, featureGates, nil, nil)

From ac3d43a8a65d5b946109167a1f4063d08c6759d5 Mon Sep 17 00:00:00 2001
From: Patrick Ohly <patrick.ohly@intel.com>
Date: Mon, 18 Nov 2024 12:35:05 +0100
Subject: [PATCH 2/3] scheduler_perf: work around incorrect gotestsum failure
 reports

Because Go does not a "pass" action for
benchmarks (https://github.com/golang/go/issues/66825#issuecomment-2343229005),
gotestsum reports a successful benchmark run as failed
(https://github.com/gotestyourself/gotestsum/issues/413#issuecomment-2343206787).

We can work around that in each benchmark and sub-benchmark by emitting the
output line that `go test` expects on stdout from the test binary for success.
---
 .../scheduler_perf/scheduler_perf.go          | 30 +++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/test/integration/scheduler_perf/scheduler_perf.go b/test/integration/scheduler_perf/scheduler_perf.go
index 0aabc57f5cd..a69100bff9e 100644
--- a/test/integration/scheduler_perf/scheduler_perf.go
+++ b/test/integration/scheduler_perf/scheduler_perf.go
@@ -29,6 +29,7 @@ import (
 	"os"
 	"path"
 	"regexp"
+	"slices"
 	"strings"
 	"sync"
 	"testing"
@@ -1115,6 +1116,32 @@ func featureGatesMerge(src map[featuregate.Feature]bool, overrides map[featurega
 	return result
 }
 
+// fixJSONOutput works around Go not emitting a "pass" action for
+// sub-benchmarks
+// (https://github.com/golang/go/issues/66825#issuecomment-2343229005), which
+// causes gotestsum to report a successful benchmark run as failed
+// (https://github.com/gotestyourself/gotestsum/issues/413#issuecomment-2343206787).
+//
+// It does this by printing the missing "PASS" output line that test2json
+// then converts into the "pass" action.
+func fixJSONOutput(b *testing.B) {
+	if !slices.Contains(os.Args, "-test.v=test2json") {
+		// Not printing JSON.
+		return
+	}
+
+	start := time.Now()
+	b.Cleanup(func() {
+		if b.Failed() {
+			// Really has failed, do nothing.
+			return
+		}
+		// SYN gets injected when using -test.v=test2json, see
+		// https://cs.opensource.google/go/go/+/refs/tags/go1.23.3:src/testing/testing.go;drc=87ec2c959c73e62bfae230ef7efca11ec2a90804;l=527
+		fmt.Fprintf(os.Stderr, "%c--- PASS: %s (%.2fs)\n", 22 /* SYN */, b.Name(), time.Since(start).Seconds())
+	})
+}
+
 // RunBenchmarkPerfScheduling runs the scheduler performance benchmark tests.
 //
 // You can pass your own scheduler plugins via outOfTreePluginRegistry.
@@ -1128,6 +1155,7 @@ func RunBenchmarkPerfScheduling(b *testing.B, configFile string, topicName strin
 	if err = validateTestCases(testCases); err != nil {
 		b.Fatal(err)
 	}
+	fixJSONOutput(b)
 
 	if testing.Short() {
 		PerfSchedulingLabelFilter += ",+short"
@@ -1147,11 +1175,13 @@ func RunBenchmarkPerfScheduling(b *testing.B, configFile string, topicName strin
 	dataItems := DataItems{Version: "v1"}
 	for _, tc := range testCases {
 		b.Run(tc.Name, func(b *testing.B) {
+			fixJSONOutput(b)
 			for _, w := range tc.Workloads {
 				b.Run(w.Name, func(b *testing.B) {
 					if !enabled(testcaseLabelSelectors, append(tc.Labels, w.Labels...)...) {
 						b.Skipf("disabled by label filter %q", PerfSchedulingLabelFilter)
 					}
+					fixJSONOutput(b)
 
 					featureGates := featureGatesMerge(tc.FeatureGates, w.FeatureGates)
 					informerFactory, tCtx := setupTestCase(b, tc, featureGates, output, outOfTreePluginRegistry)

From 25a4758bcc864766ed8bdf09d79d1869f98c1b63 Mon Sep 17 00:00:00 2001
From: Patrick Ohly <patrick.ohly@intel.com>
Date: Mon, 18 Nov 2024 12:44:34 +0100
Subject: [PATCH 3/3] testing: allow keeping detailed go test JUnit results

Pruning of tests to the top-level test was added for jobs like
pull-kubernetes-unit which run many tests. For other, more focused jobs like
scheduler-perf benchmarking it would be nice to keep the more detailed
information, in particular because it includes the duration per test case.
---
 hack/make-rules/test.sh | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/hack/make-rules/test.sh b/hack/make-rules/test.sh
index 75fa127c7e8..afddf8df982 100755
--- a/hack/make-rules/test.sh
+++ b/hack/make-rules/test.sh
@@ -81,6 +81,8 @@ fi
 # Set to 'y' to keep the verbose stdout from tests when KUBE_JUNIT_REPORT_DIR is
 # set.
 KUBE_KEEP_VERBOSE_TEST_OUTPUT=${KUBE_KEEP_VERBOSE_TEST_OUTPUT:-n}
+# Set to 'false' to disable reduction of the JUnit file to only the top level tests.
+KUBE_PRUNE_JUNIT_TESTS=${KUBE_PRUNE_JUNIT_TESTS:-true}
 
 kube::test::usage() {
   kube::log::usage_from_stdin <<EOF
@@ -234,7 +236,7 @@ runTests() {
     && rc=$? || rc=$?
 
   if [[ -n "${junit_filename_prefix}" ]]; then
-    prune-junit-xml "${junit_filename_prefix}.xml"
+    prune-junit-xml -prune-tests="${KUBE_PRUNE_JUNIT_TESTS}" "${junit_filename_prefix}.xml"
   fi
 
   if [[ ${KUBE_COVER} =~ ^[yY]$ ]]; then