From 1f341ee7b5d38a36b91f225a740a77529d373d26 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Thu, 20 Jan 2022 12:00:20 +0100 Subject: [PATCH 1/6] kube-scheduler: downgrade namespace log message from "error" to "info" GetNamespaceLabelsSnapshot has a fallback when it gets errors when looking up a namespace, therefore reporting the error is more informational than a real error. In particular, not finding the namespace is normal when running test/integration/scheduler_perf and happens so frequently that there is a lot of output on stderr: E0120 12:19:09.204768 95305 plugin.go:138] "getting namespace, assuming empty set of namespace labels" err="namespace \"namespace-1\" not found" namespace="namespace-1" --- pkg/scheduler/framework/plugins/interpodaffinity/plugin.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/scheduler/framework/plugins/interpodaffinity/plugin.go b/pkg/scheduler/framework/plugins/interpodaffinity/plugin.go index da6b35b5e68..d9d3e5a94da 100644 --- a/pkg/scheduler/framework/plugins/interpodaffinity/plugin.go +++ b/pkg/scheduler/framework/plugins/interpodaffinity/plugin.go @@ -135,6 +135,6 @@ func GetNamespaceLabelsSnapshot(ns string, nsLister listersv1.NamespaceLister) ( // Create and return snapshot of the labels. return labels.Merge(podNS.Labels, nil) } - klog.ErrorS(err, "getting namespace, assuming empty set of namespace labels", "namespace", ns) + klog.V(3).InfoS("getting namespace, assuming empty set of namespace labels", "namespace", ns, "err", err) return } From 259a8ad0b7fc75c475f9d917ad6e1ac583e7119a Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Thu, 20 Jan 2022 12:06:22 +0100 Subject: [PATCH 2/6] test: allow controlling etcd log level When running an integration test that measures performance, like for example test/integration/scheduler_perf, running etcd with debug level output is undesirable because it creates additional load on the system and isn't realistic. The default is still "debug", but ETCD_LOGLEVEL=warn can be used to override that. --- hack/lib/etcd.sh | 8 ++++++-- test/integration/scheduler_perf/README.md | 8 ++++---- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/hack/lib/etcd.sh b/hack/lib/etcd.sh index 756e342499c..185420d5427 100755 --- a/hack/lib/etcd.sh +++ b/hack/lib/etcd.sh @@ -19,6 +19,10 @@ ETCD_VERSION=${ETCD_VERSION:-3.5.1} ETCD_HOST=${ETCD_HOST:-127.0.0.1} ETCD_PORT=${ETCD_PORT:-2379} +# This is intentionally not called ETCD_LOG_LEVEL: +# etcd checks that and compains when it is set in addition +# to the command line argument, even when both have the same value. +ETCD_LOGLEVEL=${ETCD_LOGLEVEL:-debug} export KUBE_INTEGRATION_ETCD_URL="http://${ETCD_HOST}:${ETCD_PORT}" kube::etcd::validate() { @@ -82,8 +86,8 @@ kube::etcd::start() { else ETCD_LOGFILE=${ETCD_LOGFILE:-"/dev/null"} fi - kube::log::info "etcd --advertise-client-urls ${KUBE_INTEGRATION_ETCD_URL} --data-dir ${ETCD_DIR} --listen-client-urls http://${ETCD_HOST}:${ETCD_PORT} --log-level=debug > \"${ETCD_LOGFILE}\" 2>/dev/null" - etcd --advertise-client-urls "${KUBE_INTEGRATION_ETCD_URL}" --data-dir "${ETCD_DIR}" --listen-client-urls "${KUBE_INTEGRATION_ETCD_URL}" --log-level=debug 2> "${ETCD_LOGFILE}" >/dev/null & + kube::log::info "etcd --advertise-client-urls ${KUBE_INTEGRATION_ETCD_URL} --data-dir ${ETCD_DIR} --listen-client-urls http://${ETCD_HOST}:${ETCD_PORT} --log-level=${ETCD_LOGLEVEL} > \"${ETCD_LOGFILE}\" 2>/dev/null" + etcd --advertise-client-urls "${KUBE_INTEGRATION_ETCD_URL}" --data-dir "${ETCD_DIR}" --listen-client-urls "${KUBE_INTEGRATION_ETCD_URL}" --log-level="${ETCD_LOGLEVEL}" 2> "${ETCD_LOGFILE}" >/dev/null & ETCD_PID=$! echo "Waiting for etcd to come up." diff --git a/test/integration/scheduler_perf/README.md b/test/integration/scheduler_perf/README.md index 536ce951354..5a6ea8d10ef 100644 --- a/test/integration/scheduler_perf/README.md +++ b/test/integration/scheduler_perf/README.md @@ -36,14 +36,14 @@ How To Run ```shell # In Kubernetes root path -make test-integration WHAT=./test/integration/scheduler_perf KUBE_TEST_VMODULE="''" KUBE_TEST_ARGS="-alsologtostderr=true -logtostderr=true -run=." KUBE_TIMEOUT="--timeout=60m" SHORT="--short=false" +make test-integration WHAT=./test/integration/scheduler_perf ETCD_LOGLEVEL=warn KUBE_TEST_VMODULE="''" KUBE_TEST_ARGS="-alsologtostderr=true -logtostderr=true -run=." KUBE_TIMEOUT="--timeout=60m" SHORT="--short=false" ``` ## Benchmark tests ```shell # In Kubernetes root path -make test-integration WHAT=./test/integration/scheduler_perf KUBE_TEST_VMODULE="''" KUBE_TEST_ARGS="-alsologtostderr=false -logtostderr=false -run=^$$ -benchtime=1ns -bench=BenchmarkPerfScheduling" +make test-integration WHAT=./test/integration/scheduler_perf ETCD_LOGLEVEL=warn KUBE_TEST_VMODULE="''" KUBE_TEST_ARGS="-alsologtostderr=false -logtostderr=false -run=^$$ -benchtime=1ns -bench=BenchmarkPerfScheduling" ``` The benchmark suite runs all the tests specified under config/performance-config.yaml. @@ -58,14 +58,14 @@ Otherwise, the golang benchmark framework will try to run a test more than once ```shell # In Kubernetes root path -make test-integration WHAT=./test/integration/scheduler_perf KUBE_TEST_VMODULE="''" KUBE_TEST_ARGS="-alsologtostderr=false -logtostderr=false -run=^$$ -benchtime=1ns -bench=BenchmarkPerfScheduling/SchedulingBasic/5000Nodes/5000InitPods/1000PodsToSchedule" +make test-integration WHAT=./test/integration/scheduler_perf ETCD_LOGLEVEL=warn KUBE_TEST_VMODULE="''" KUBE_TEST_ARGS="-alsologtostderr=false -logtostderr=false -run=^$$ -benchtime=1ns -bench=BenchmarkPerfScheduling/SchedulingBasic/5000Nodes/5000InitPods/1000PodsToSchedule" ``` To produce a cpu profile: ```shell # In Kubernetes root path -make test-integration WHAT=./test/integration/scheduler_perf KUBE_TIMEOUT="-timeout=3600s" KUBE_TEST_VMODULE="''" KUBE_TEST_ARGS="-alsologtostderr=false -logtostderr=false -run=^$$ -benchtime=1ns -bench=BenchmarkPerfScheduling -cpuprofile ~/cpu-profile.out" +make test-integration WHAT=./test/integration/scheduler_perf KUBE_TIMEOUT="-timeout=3600s" ETCD_LOGLEVEL=warn KUBE_TEST_VMODULE="''" KUBE_TEST_ARGS="-alsologtostderr=false -logtostderr=false -run=^$$ -benchtime=1ns -bench=BenchmarkPerfScheduling -cpuprofile ~/cpu-profile.out" ``` ### How to configure benchmark tests From 535d885c291a8e6e9c245407128c1765c36bfaef Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Thu, 20 Jan 2022 13:38:07 +0100 Subject: [PATCH 3/6] test: fix info message for etcd startup The output redirection in the info message did not match the actual invocation (stdout and stderr swapped). --- hack/lib/etcd.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hack/lib/etcd.sh b/hack/lib/etcd.sh index 185420d5427..4df9c3fc442 100755 --- a/hack/lib/etcd.sh +++ b/hack/lib/etcd.sh @@ -86,7 +86,7 @@ kube::etcd::start() { else ETCD_LOGFILE=${ETCD_LOGFILE:-"/dev/null"} fi - kube::log::info "etcd --advertise-client-urls ${KUBE_INTEGRATION_ETCD_URL} --data-dir ${ETCD_DIR} --listen-client-urls http://${ETCD_HOST}:${ETCD_PORT} --log-level=${ETCD_LOGLEVEL} > \"${ETCD_LOGFILE}\" 2>/dev/null" + kube::log::info "etcd --advertise-client-urls ${KUBE_INTEGRATION_ETCD_URL} --data-dir ${ETCD_DIR} --listen-client-urls http://${ETCD_HOST}:${ETCD_PORT} --log-level=${ETCD_LOGLEVEL} 2> \"${ETCD_LOGFILE}\" >/dev/null" etcd --advertise-client-urls "${KUBE_INTEGRATION_ETCD_URL}" --data-dir "${ETCD_DIR}" --listen-client-urls "${KUBE_INTEGRATION_ETCD_URL}" --log-level="${ETCD_LOGLEVEL}" 2> "${ETCD_LOGFILE}" >/dev/null & ETCD_PID=$! From 8d44b819b3fa0b0d82731078769edf1824830619 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Thu, 20 Jan 2022 20:04:07 +0100 Subject: [PATCH 4/6] scheduler_perf: avoid ambiguous test names "-bench=PerfScheduling/Preemption/500Nodes" ran both the PerfScheduling/Preemption/500Nodes and the PerfScheduling/PreemptionPVs/500Nodes benchmark. This can be avoided by choosing names where none is the prefix of another. --- test/integration/scheduler_perf/config/performance-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/integration/scheduler_perf/config/performance-config.yaml b/test/integration/scheduler_perf/config/performance-config.yaml index 20d79dcaeb4..ff75a1b3107 100644 --- a/test/integration/scheduler_perf/config/performance-config.yaml +++ b/test/integration/scheduler_perf/config/performance-config.yaml @@ -373,7 +373,7 @@ initPods: 2000 measurePods: 1000 -- name: Preemption +- name: PreemptionBasic workloadTemplate: - opcode: createNodes countParam: $initNodes From c62d7407c8a764da55c2248dfcd250a9c32e3e24 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Thu, 20 Jan 2022 20:08:02 +0100 Subject: [PATCH 5/6] scheduler_perf: dump test data when writing it failed Occasionally, writing as JSON failed because a NaN float couldn't be encoded. The extended log message helps understand where that comes from, for example: F0120 20:24:45.515745 511835 scheduler_perf_test.go:540] BenchmarkPerfScheduling: unable to write measured data {Version:v1 DataItems:[{Data:map[Average:35.714285714285715 Perc50:2 Perc90:36 Perc95:412 Perc99:412] Unit:pods/s Labels:map[Metric:SchedulingThroughput Name:BenchmarkPerfScheduling/PreemptionPVs/500Nodes/namespace-2]} {Data:map[Average:27.863967530999993 Perc50:13.925925925925926 Perc90:30.06711409395973 Perc95:31.85682326621924 Perc99:704] Unit:ms Labels:map[Metric:scheduler_e2e_scheduling_duration_seconds Name:BenchmarkPerfScheduling/PreemptionPVs/500Nodes/namespace-2]} {Data:map[Average:11915.651577744 Perc50:15168.796680497926 Perc90:19417.759336099585 Perc95:19948.87966804979 Perc99:20373.77593360996] Unit:ms Labels:map[Metric:scheduler_pod_scheduling_duration_seconds Name:BenchmarkPerfScheduling/PreemptionPVs/500Nodes/namespace-2]} {Data:map[Average:1.1865832049999983 Perc50:0.7636363636363637 Perc90:2.891903719912473 Perc95:3.066958424507659 Perc99:5.333333333333334] Unit:ms Labels:map[Metric:scheduler_framework_extension_point_duration_seconds Name:BenchmarkPerfScheduling/PreemptionPVs/500Nodes/namespace-2 extension_point:Filter]} {Data:map[Average:NaN Perc50:NaN Perc90:NaN Perc95:NaN Perc99:NaN] Unit:ms Labels:map[Metric:scheduler_framework_extension_point_duration_seconds Name:BenchmarkPerfScheduling/PreemptionPVs/500Nodes/namespace-2 extension_point:Score]}]}: json: unsupported value: NaN --- test/integration/scheduler_perf/scheduler_perf_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/integration/scheduler_perf/scheduler_perf_test.go b/test/integration/scheduler_perf/scheduler_perf_test.go index a594502dd0d..66bcba96837 100644 --- a/test/integration/scheduler_perf/scheduler_perf_test.go +++ b/test/integration/scheduler_perf/scheduler_perf_test.go @@ -537,7 +537,7 @@ func BenchmarkPerfScheduling(b *testing.B) { }) } if err := dataItems2JSONFile(dataItems, b.Name()); err != nil { - klog.Fatalf("%v: unable to write measured data: %v", b.Name(), err) + klog.Fatalf("%v: unable to write measured data %+v: %v", b.Name(), dataItems, err) } } From e1e84c8e5f56d0f6e709e418839662200243ec90 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Mon, 7 Feb 2022 09:12:10 +0100 Subject: [PATCH 6/6] scheduler_perf: run with -v=0 by default This provides a mechanism for overriding the forced increase of the klog verbosity to 4 when starting the apiserver and uses that for the scheduler_perf benchmark. Other tests run as before. A global variable was used because adding an explicit parameter to several helper functions would have caused a lot of code churn (test -> integration/util.StartApiserver -> integration/framework.RunAnAPIServerUsingServer -> integration/framework.startAPIServerOrDie). --- test/integration/framework/controlplane_utils.go | 13 ++++++++++--- test/integration/scheduler_perf/util.go | 3 +++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/test/integration/framework/controlplane_utils.go b/test/integration/framework/controlplane_utils.go index 8b1421f8f12..a476611459a 100644 --- a/test/integration/framework/controlplane_utils.go +++ b/test/integration/framework/controlplane_utils.go @@ -65,6 +65,13 @@ const ( UnprivilegedUserToken = "unprivileged-user" ) +// MinVerbosity determines the minimum klog verbosity when running tests that +// involve the apiserver. This overrides the -v value from the command line, +// i.e. -v=0 has no effect when MinVerbosity is 4 (the default). Tests can opt +// out of this by setting MinVerbosity to zero before starting the control +// plane or choose some different minimum verbosity. +var MinVerbosity = 4 + // Config is a struct of configuration directives for NewControlPlaneComponents. type Config struct { // If nil, a default is used, partially filled configs will not get populated. @@ -139,11 +146,11 @@ func startAPIServerOrDie(controlPlaneConfig *controlplane.Config, incomingServer var m *controlplane.Instance var s *httptest.Server - // Ensure we log at least level 4 + // Ensure we log at least at the desired level v := flag.Lookup("v").Value level, _ := strconv.Atoi(v.String()) - if level < 4 { - v.Set("4") + if level < MinVerbosity { + v.Set(strconv.Itoa(MinVerbosity)) } if incomingServer != nil { diff --git a/test/integration/scheduler_perf/util.go b/test/integration/scheduler_perf/util.go index 18f2f7cb8eb..e2168986f57 100644 --- a/test/integration/scheduler_perf/util.go +++ b/test/integration/scheduler_perf/util.go @@ -44,6 +44,7 @@ import ( "k8s.io/kube-scheduler/config/v1beta2" "k8s.io/kubernetes/pkg/scheduler/apis/config" kubeschedulerscheme "k8s.io/kubernetes/pkg/scheduler/apis/config/scheme" + "k8s.io/kubernetes/test/integration/framework" "k8s.io/kubernetes/test/integration/util" testutils "k8s.io/kubernetes/test/utils" ) @@ -75,6 +76,8 @@ func newDefaultComponentConfig() (*config.KubeSchedulerConfiguration, error) { // Notes on rate limiter: // - client rate limit is set to 5000. func mustSetupScheduler(config *config.KubeSchedulerConfiguration) (util.ShutdownFunc, coreinformers.PodInformer, clientset.Interface, dynamic.Interface) { + // Run API server with minimimal logging by default. Can be raised with -v. + framework.MinVerbosity = 0 apiURL, apiShutdown := util.StartApiserver() var err error