From 1f341ee7b5d38a36b91f225a740a77529d373d26 Mon Sep 17 00:00:00 2001
From: Patrick Ohly <patrick.ohly@intel.com>
Date: Thu, 20 Jan 2022 12:00:20 +0100
Subject: [PATCH 1/6] kube-scheduler: downgrade namespace log message from
 "error" to "info"

GetNamespaceLabelsSnapshot has a fallback when it gets errors when looking up a
namespace, therefore reporting the error is more informational than a real
error. In particular, not finding the namespace is normal when running
test/integration/scheduler_perf and happens so frequently that there is a lot
of output on stderr:

E0120 12:19:09.204768   95305 plugin.go:138] "getting namespace, assuming empty set of namespace labels" err="namespace \"namespace-1\" not found" namespace="namespace-1"
---
 pkg/scheduler/framework/plugins/interpodaffinity/plugin.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pkg/scheduler/framework/plugins/interpodaffinity/plugin.go b/pkg/scheduler/framework/plugins/interpodaffinity/plugin.go
index da6b35b5e68..d9d3e5a94da 100644
--- a/pkg/scheduler/framework/plugins/interpodaffinity/plugin.go
+++ b/pkg/scheduler/framework/plugins/interpodaffinity/plugin.go
@@ -135,6 +135,6 @@ func GetNamespaceLabelsSnapshot(ns string, nsLister listersv1.NamespaceLister) (
 		// Create and return snapshot of the labels.
 		return labels.Merge(podNS.Labels, nil)
 	}
-	klog.ErrorS(err, "getting namespace, assuming empty set of namespace labels", "namespace", ns)
+	klog.V(3).InfoS("getting namespace, assuming empty set of namespace labels", "namespace", ns, "err", err)
 	return
 }

From 259a8ad0b7fc75c475f9d917ad6e1ac583e7119a Mon Sep 17 00:00:00 2001
From: Patrick Ohly <patrick.ohly@intel.com>
Date: Thu, 20 Jan 2022 12:06:22 +0100
Subject: [PATCH 2/6] test: allow controlling etcd log level

When running an integration test that measures performance, like for example
test/integration/scheduler_perf, running etcd with debug level output is
undesirable because it creates additional load on the system and isn't
realistic.

The default is still "debug", but ETCD_LOGLEVEL=warn can be used to override
that.
---
 hack/lib/etcd.sh                          | 8 ++++++--
 test/integration/scheduler_perf/README.md | 8 ++++----
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/hack/lib/etcd.sh b/hack/lib/etcd.sh
index 756e342499c..185420d5427 100755
--- a/hack/lib/etcd.sh
+++ b/hack/lib/etcd.sh
@@ -19,6 +19,10 @@
 ETCD_VERSION=${ETCD_VERSION:-3.5.1}
 ETCD_HOST=${ETCD_HOST:-127.0.0.1}
 ETCD_PORT=${ETCD_PORT:-2379}
+# This is intentionally not called ETCD_LOG_LEVEL:
+# etcd checks that and compains when it is set in addition
+# to the command line argument, even when both have the same value.
+ETCD_LOGLEVEL=${ETCD_LOGLEVEL:-debug}
 export KUBE_INTEGRATION_ETCD_URL="http://${ETCD_HOST}:${ETCD_PORT}"
 
 kube::etcd::validate() {
@@ -82,8 +86,8 @@ kube::etcd::start() {
   else
     ETCD_LOGFILE=${ETCD_LOGFILE:-"/dev/null"}
   fi
-  kube::log::info "etcd --advertise-client-urls ${KUBE_INTEGRATION_ETCD_URL} --data-dir ${ETCD_DIR} --listen-client-urls http://${ETCD_HOST}:${ETCD_PORT} --log-level=debug > \"${ETCD_LOGFILE}\" 2>/dev/null"
-  etcd --advertise-client-urls "${KUBE_INTEGRATION_ETCD_URL}" --data-dir "${ETCD_DIR}" --listen-client-urls "${KUBE_INTEGRATION_ETCD_URL}" --log-level=debug 2> "${ETCD_LOGFILE}" >/dev/null &
+  kube::log::info "etcd --advertise-client-urls ${KUBE_INTEGRATION_ETCD_URL} --data-dir ${ETCD_DIR} --listen-client-urls http://${ETCD_HOST}:${ETCD_PORT} --log-level=${ETCD_LOGLEVEL} > \"${ETCD_LOGFILE}\" 2>/dev/null"
+  etcd --advertise-client-urls "${KUBE_INTEGRATION_ETCD_URL}" --data-dir "${ETCD_DIR}" --listen-client-urls "${KUBE_INTEGRATION_ETCD_URL}" --log-level="${ETCD_LOGLEVEL}" 2> "${ETCD_LOGFILE}" >/dev/null &
   ETCD_PID=$!
 
   echo "Waiting for etcd to come up."
diff --git a/test/integration/scheduler_perf/README.md b/test/integration/scheduler_perf/README.md
index 536ce951354..5a6ea8d10ef 100644
--- a/test/integration/scheduler_perf/README.md
+++ b/test/integration/scheduler_perf/README.md
@@ -36,14 +36,14 @@ How To Run
 
 ```shell
 # In Kubernetes root path
-make test-integration WHAT=./test/integration/scheduler_perf KUBE_TEST_VMODULE="''" KUBE_TEST_ARGS="-alsologtostderr=true -logtostderr=true -run=." KUBE_TIMEOUT="--timeout=60m" SHORT="--short=false"
+make test-integration WHAT=./test/integration/scheduler_perf ETCD_LOGLEVEL=warn KUBE_TEST_VMODULE="''" KUBE_TEST_ARGS="-alsologtostderr=true -logtostderr=true -run=." KUBE_TIMEOUT="--timeout=60m" SHORT="--short=false"
 ```
 
 ## Benchmark tests
 
 ```shell
 # In Kubernetes root path
-make test-integration WHAT=./test/integration/scheduler_perf KUBE_TEST_VMODULE="''" KUBE_TEST_ARGS="-alsologtostderr=false -logtostderr=false -run=^$$ -benchtime=1ns -bench=BenchmarkPerfScheduling"
+make test-integration WHAT=./test/integration/scheduler_perf ETCD_LOGLEVEL=warn KUBE_TEST_VMODULE="''" KUBE_TEST_ARGS="-alsologtostderr=false -logtostderr=false -run=^$$ -benchtime=1ns -bench=BenchmarkPerfScheduling"
 ```
 
 The benchmark suite runs all the tests specified under config/performance-config.yaml.
@@ -58,14 +58,14 @@ Otherwise, the golang benchmark framework will try to run a test more than once
 
 ```shell
 # In Kubernetes root path
-make test-integration WHAT=./test/integration/scheduler_perf KUBE_TEST_VMODULE="''" KUBE_TEST_ARGS="-alsologtostderr=false -logtostderr=false -run=^$$ -benchtime=1ns -bench=BenchmarkPerfScheduling/SchedulingBasic/5000Nodes/5000InitPods/1000PodsToSchedule"
+make test-integration WHAT=./test/integration/scheduler_perf ETCD_LOGLEVEL=warn KUBE_TEST_VMODULE="''" KUBE_TEST_ARGS="-alsologtostderr=false -logtostderr=false -run=^$$ -benchtime=1ns -bench=BenchmarkPerfScheduling/SchedulingBasic/5000Nodes/5000InitPods/1000PodsToSchedule"
 ```
 
 To produce a cpu profile:
 
 ```shell
 # In Kubernetes root path
-make test-integration WHAT=./test/integration/scheduler_perf KUBE_TIMEOUT="-timeout=3600s" KUBE_TEST_VMODULE="''" KUBE_TEST_ARGS="-alsologtostderr=false -logtostderr=false -run=^$$ -benchtime=1ns -bench=BenchmarkPerfScheduling -cpuprofile ~/cpu-profile.out"
+make test-integration WHAT=./test/integration/scheduler_perf KUBE_TIMEOUT="-timeout=3600s" ETCD_LOGLEVEL=warn KUBE_TEST_VMODULE="''" KUBE_TEST_ARGS="-alsologtostderr=false -logtostderr=false -run=^$$ -benchtime=1ns -bench=BenchmarkPerfScheduling -cpuprofile ~/cpu-profile.out"
 ```
 
 ### How to configure benchmark tests

From 535d885c291a8e6e9c245407128c1765c36bfaef Mon Sep 17 00:00:00 2001
From: Patrick Ohly <patrick.ohly@intel.com>
Date: Thu, 20 Jan 2022 13:38:07 +0100
Subject: [PATCH 3/6] test: fix info message for etcd startup

The output redirection in the info message did not match the actual
invocation (stdout and stderr swapped).
---
 hack/lib/etcd.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hack/lib/etcd.sh b/hack/lib/etcd.sh
index 185420d5427..4df9c3fc442 100755
--- a/hack/lib/etcd.sh
+++ b/hack/lib/etcd.sh
@@ -86,7 +86,7 @@ kube::etcd::start() {
   else
     ETCD_LOGFILE=${ETCD_LOGFILE:-"/dev/null"}
   fi
-  kube::log::info "etcd --advertise-client-urls ${KUBE_INTEGRATION_ETCD_URL} --data-dir ${ETCD_DIR} --listen-client-urls http://${ETCD_HOST}:${ETCD_PORT} --log-level=${ETCD_LOGLEVEL} > \"${ETCD_LOGFILE}\" 2>/dev/null"
+  kube::log::info "etcd --advertise-client-urls ${KUBE_INTEGRATION_ETCD_URL} --data-dir ${ETCD_DIR} --listen-client-urls http://${ETCD_HOST}:${ETCD_PORT} --log-level=${ETCD_LOGLEVEL} 2> \"${ETCD_LOGFILE}\" >/dev/null"
   etcd --advertise-client-urls "${KUBE_INTEGRATION_ETCD_URL}" --data-dir "${ETCD_DIR}" --listen-client-urls "${KUBE_INTEGRATION_ETCD_URL}" --log-level="${ETCD_LOGLEVEL}" 2> "${ETCD_LOGFILE}" >/dev/null &
   ETCD_PID=$!
 

From 8d44b819b3fa0b0d82731078769edf1824830619 Mon Sep 17 00:00:00 2001
From: Patrick Ohly <patrick.ohly@intel.com>
Date: Thu, 20 Jan 2022 20:04:07 +0100
Subject: [PATCH 4/6] scheduler_perf: avoid ambiguous test names

"-bench=PerfScheduling/Preemption/500Nodes" ran both the
PerfScheduling/Preemption/500Nodes and the
PerfScheduling/PreemptionPVs/500Nodes benchmark.

This can be avoided by choosing names where none is the prefix of another.
---
 test/integration/scheduler_perf/config/performance-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/integration/scheduler_perf/config/performance-config.yaml b/test/integration/scheduler_perf/config/performance-config.yaml
index 20d79dcaeb4..ff75a1b3107 100644
--- a/test/integration/scheduler_perf/config/performance-config.yaml
+++ b/test/integration/scheduler_perf/config/performance-config.yaml
@@ -373,7 +373,7 @@
       initPods: 2000
       measurePods: 1000
 
-- name: Preemption
+- name: PreemptionBasic
   workloadTemplate:
   - opcode: createNodes
     countParam: $initNodes

From c62d7407c8a764da55c2248dfcd250a9c32e3e24 Mon Sep 17 00:00:00 2001
From: Patrick Ohly <patrick.ohly@intel.com>
Date: Thu, 20 Jan 2022 20:08:02 +0100
Subject: [PATCH 5/6] scheduler_perf: dump test data when writing it failed

Occasionally, writing as JSON failed because a NaN float couldn't be
encoded. The extended log message helps understand where that comes from, for
example:

F0120 20:24:45.515745  511835 scheduler_perf_test.go:540] BenchmarkPerfScheduling: unable to write measured data {Version:v1 DataItems:[{Data:map[Average:35.714285714285715 Perc50:2 Perc90:36 Perc95:412 Perc99:412] Unit:pods/s Labels:map[Metric:SchedulingThroughput Name:BenchmarkPerfScheduling/PreemptionPVs/500Nodes/namespace-2]} {Data:map[Average:27.863967530999993 Perc50:13.925925925925926 Perc90:30.06711409395973 Perc95:31.85682326621924 Perc99:704] Unit:ms Labels:map[Metric:scheduler_e2e_scheduling_duration_seconds Name:BenchmarkPerfScheduling/PreemptionPVs/500Nodes/namespace-2]} {Data:map[Average:11915.651577744 Perc50:15168.796680497926 Perc90:19417.759336099585 Perc95:19948.87966804979 Perc99:20373.77593360996] Unit:ms Labels:map[Metric:scheduler_pod_scheduling_duration_seconds Name:BenchmarkPerfScheduling/PreemptionPVs/500Nodes/namespace-2]} {Data:map[Average:1.1865832049999983 Perc50:0.7636363636363637 Perc90:2.891903719912473 Perc95:3.066958424507659 Perc99:5.333333333333334] Unit:ms Labels:map[Metric:scheduler_framework_extension_point_duration_seconds Name:BenchmarkPerfScheduling/PreemptionPVs/500Nodes/namespace-2 extension_point:Filter]} {Data:map[Average:NaN Perc50:NaN Perc90:NaN Perc95:NaN Perc99:NaN] Unit:ms Labels:map[Metric:scheduler_framework_extension_point_duration_seconds Name:BenchmarkPerfScheduling/PreemptionPVs/500Nodes/namespace-2 extension_point:Score]}]}: json: unsupported value: NaN
---
 test/integration/scheduler_perf/scheduler_perf_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/integration/scheduler_perf/scheduler_perf_test.go b/test/integration/scheduler_perf/scheduler_perf_test.go
index a594502dd0d..66bcba96837 100644
--- a/test/integration/scheduler_perf/scheduler_perf_test.go
+++ b/test/integration/scheduler_perf/scheduler_perf_test.go
@@ -537,7 +537,7 @@ func BenchmarkPerfScheduling(b *testing.B) {
 		})
 	}
 	if err := dataItems2JSONFile(dataItems, b.Name()); err != nil {
-		klog.Fatalf("%v: unable to write measured data: %v", b.Name(), err)
+		klog.Fatalf("%v: unable to write measured data %+v: %v", b.Name(), dataItems, err)
 	}
 }
 

From e1e84c8e5f56d0f6e709e418839662200243ec90 Mon Sep 17 00:00:00 2001
From: Patrick Ohly <patrick.ohly@intel.com>
Date: Mon, 7 Feb 2022 09:12:10 +0100
Subject: [PATCH 6/6] scheduler_perf: run with -v=0 by default

This provides a mechanism for overriding the forced increase of the klog
verbosity to 4 when starting the apiserver and uses that for the scheduler_perf
benchmark. Other tests run as before.

A global variable was used because adding an explicit parameter to several
helper functions would have caused a lot of code churn (test ->
integration/util.StartApiserver ->
integration/framework.RunAnAPIServerUsingServer ->
integration/framework.startAPIServerOrDie).
---
 test/integration/framework/controlplane_utils.go | 13 ++++++++++---
 test/integration/scheduler_perf/util.go          |  3 +++
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/test/integration/framework/controlplane_utils.go b/test/integration/framework/controlplane_utils.go
index 8b1421f8f12..a476611459a 100644
--- a/test/integration/framework/controlplane_utils.go
+++ b/test/integration/framework/controlplane_utils.go
@@ -65,6 +65,13 @@ const (
 	UnprivilegedUserToken = "unprivileged-user"
 )
 
+// MinVerbosity determines the minimum klog verbosity when running tests that
+// involve the apiserver.  This overrides the -v value from the command line,
+// i.e. -v=0 has no effect when MinVerbosity is 4 (the default).  Tests can opt
+// out of this by setting MinVerbosity to zero before starting the control
+// plane or choose some different minimum verbosity.
+var MinVerbosity = 4
+
 // Config is a struct of configuration directives for NewControlPlaneComponents.
 type Config struct {
 	// If nil, a default is used, partially filled configs will not get populated.
@@ -139,11 +146,11 @@ func startAPIServerOrDie(controlPlaneConfig *controlplane.Config, incomingServer
 	var m *controlplane.Instance
 	var s *httptest.Server
 
-	// Ensure we log at least level 4
+	// Ensure we log at least at the desired level
 	v := flag.Lookup("v").Value
 	level, _ := strconv.Atoi(v.String())
-	if level < 4 {
-		v.Set("4")
+	if level < MinVerbosity {
+		v.Set(strconv.Itoa(MinVerbosity))
 	}
 
 	if incomingServer != nil {
diff --git a/test/integration/scheduler_perf/util.go b/test/integration/scheduler_perf/util.go
index 18f2f7cb8eb..e2168986f57 100644
--- a/test/integration/scheduler_perf/util.go
+++ b/test/integration/scheduler_perf/util.go
@@ -44,6 +44,7 @@ import (
 	"k8s.io/kube-scheduler/config/v1beta2"
 	"k8s.io/kubernetes/pkg/scheduler/apis/config"
 	kubeschedulerscheme "k8s.io/kubernetes/pkg/scheduler/apis/config/scheme"
+	"k8s.io/kubernetes/test/integration/framework"
 	"k8s.io/kubernetes/test/integration/util"
 	testutils "k8s.io/kubernetes/test/utils"
 )
@@ -75,6 +76,8 @@ func newDefaultComponentConfig() (*config.KubeSchedulerConfiguration, error) {
 // Notes on rate limiter:
 //   - client rate limit is set to 5000.
 func mustSetupScheduler(config *config.KubeSchedulerConfiguration) (util.ShutdownFunc, coreinformers.PodInformer, clientset.Interface, dynamic.Interface) {
+	// Run API server with minimimal logging by default. Can be raised with -v.
+	framework.MinVerbosity = 0
 	apiURL, apiShutdown := util.StartApiserver()
 	var err error