From d85b91f343bf209ff48cbd8d03f31118cce43e29 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Fri, 5 May 2023 17:35:57 +0200 Subject: [PATCH] scheduler-perf: measure workload runtime and relabel workloads The goal is to only label workloads as "performance" which actually run long enough to provide useful metrics. The throughput collector samples once per second, so a workload should run at least 5, better 10 seconds to get at least a minimal amount of samples for the percentile calculation. For benchstat analysis of runs with sufficient repetitions to get statistically meaningful results, each workload shouldn't run more than one minute, otherwise before/after analysis becomes too slow. The labels were chosen based on benchmark runs on a reasonably fast desktop. To know how long each workload takes, a new "runtime_seconds" benchmark result gets added. --- .../config/performance-config.yaml | 51 ++++++++++++++++--- .../scheduler_perf/scheduler_perf_test.go | 9 ++++ 2 files changed, 52 insertions(+), 8 deletions(-) diff --git a/test/integration/scheduler_perf/config/performance-config.yaml b/test/integration/scheduler_perf/config/performance-config.yaml index 2ba1a518782..7bccae671ef 100644 --- a/test/integration/scheduler_perf/config/performance-config.yaml +++ b/test/integration/scheduler_perf/config/performance-config.yaml @@ -1,5 +1,4 @@ - name: SchedulingBasic - labels: [performance] defaultPodTemplatePath: config/pod-default.yaml workloadTemplate: - opcode: createNodes @@ -17,13 +16,13 @@ initPods: 500 measurePods: 1000 - name: 5000Nodes + labels: [performance, fast] params: initNodes: 5000 initPods: 1000 measurePods: 1000 - name: SchedulingPodAntiAffinity - labels: [performance] defaultPodTemplatePath: config/pod-with-pod-anti-affinity.yaml workloadTemplate: - opcode: createNodes @@ -46,13 +45,13 @@ initPods: 100 measurePods: 400 - name: 5000Nodes + labels: [performance, fast] params: initNodes: 5000 initPods: 1000 measurePods: 1000 - name: SchedulingSecrets - labels: [performance] defaultPodTemplatePath: config/pod-with-secret-volume.yaml workloadTemplate: - opcode: createNodes @@ -70,6 +69,7 @@ initPods: 500 measurePods: 1000 - name: 5000Nodes + labels: [performance, fast] params: initNodes: 5000 initPods: 5000 @@ -173,7 +173,6 @@ measurePods: 1000 - name: SchedulingPodAffinity - labels: [performance] defaultPodTemplatePath: config/pod-with-pod-affinity.yaml workloadTemplate: - opcode: createNodes @@ -200,6 +199,7 @@ initPods: 500 measurePods: 1000 - name: 5000Nodes + labels: [performance, fast] params: initNodes: 5000 initPods: 5000 @@ -235,7 +235,6 @@ measurePods: 1000 - name: SchedulingPreferredPodAntiAffinity - labels: [performance] defaultPodTemplatePath: config/pod-with-preferred-pod-affinity.yaml workloadTemplate: - opcode: createNodes @@ -258,13 +257,13 @@ initPods: 500 measurePods: 1000 - name: 5000Nodes + labels: [performance, fast] params: initNodes: 5000 initPods: 5000 measurePods: 1000 - name: SchedulingNodeAffinity - labels: [performance] defaultPodTemplatePath: config/pod-with-node-affinity.yaml workloadTemplate: - opcode: createNodes @@ -286,13 +285,13 @@ initPods: 500 measurePods: 1000 - name: 5000Nodes + labels: [performance, fast] params: initNodes: 5000 initPods: 5000 measurePods: 1000 - name: TopologySpreading - labels: [performance] workloadTemplate: - opcode: createNodes countParam: $initNodes @@ -315,6 +314,7 @@ initPods: 1000 measurePods: 1000 - name: 5000Nodes + labels: [performance, fast] params: initNodes: 5000 initPods: 5000 @@ -411,6 +411,7 @@ collectMetrics: true workloads: - name: 500Nodes + labels: [fast] params: initNodes: 500 initPods: 2000 @@ -455,7 +456,6 @@ # measurePods: 5000 - name: Unschedulable - labels: [performance] workloadTemplate: - opcode: createNodes countParam: $initNodes @@ -475,6 +475,7 @@ initPods: 200 measurePods: 1000 - name: 5000Nodes/200InitPods + labels: [performance, fast] params: initNodes: 5000 initPods: 200 @@ -508,11 +509,13 @@ initNodes: 1000 measurePods: 1000 - name: 5000Nodes + labels: [performance, fast] params: initNodes: 5000 measurePods: 2000 - name: SchedulingRequiredPodAntiAffinityWithNSSelector + labels: [performance] defaultPodTemplatePath: config/pod-anti-affinity-ns-selector.yaml workloadTemplate: - opcode: createNodes @@ -536,6 +539,13 @@ collectMetrics: true namespace: measure-ns-0 workloads: + - name: 500Nodes + labels: [fast] + params: + initNodes: 500 + initPodsPerNamespace: 4 + initNamespaces: 10 + measurePods: 100 - name: 5000Nodes params: initNodes: 5000 @@ -544,6 +554,7 @@ measurePods: 1000 - name: SchedulingPreferredAntiAffinityWithNSSelector + labels: [performance] defaultPodTemplatePath: config/pod-preferred-anti-affinity-ns-selector.yaml workloadTemplate: - opcode: createNodes @@ -567,6 +578,13 @@ collectMetrics: true namespace: measure-ns-0 workloads: + - name: 500Nodes + labels: [fast] + params: + initNodes: 500 + initPodsPerNamespace: 4 + initNamespaces: 10 + measurePods: 100 - name: 5000Nodes params: initNodes: 5000 @@ -575,6 +593,7 @@ measurePods: 1000 - name: SchedulingRequiredPodAffinityWithNSSelector + labels: [performance] defaultPodTemplatePath: config/pod-affinity-ns-selector.yaml workloadTemplate: - opcode: createNodes @@ -601,6 +620,13 @@ collectMetrics: true namespace: measure-ns-0 workloads: + - name: 500Nodes + labels: [fast] + params: + initNodes: 500 + initPodsPerNamespace: 4 + initNamespaces: 10 + measurePods: 100 - name: 5000Nodes params: initNodes: 5000 @@ -609,6 +635,7 @@ measurePods: 1000 - name: SchedulingPreferredAffinityWithNSSelector + labels: [performance] defaultPodTemplatePath: config/pod-preferred-affinity-ns-selector.yaml workloadTemplate: - opcode: createNodes @@ -632,6 +659,13 @@ collectMetrics: true namespace: measure-ns-0 workloads: + - name: 500Nodes + labels: [fast] + params: + initNodes: 500 + initPodsPerNamespace: 4 + initNamespaces: 10 + measurePods: 100 - name: 5000Nodes params: initNodes: 5000 @@ -660,6 +694,7 @@ normalNodes: 400 measurePods: 400 - name: 5000Nodes + labels: [performance, fast] params: taintNodes: 1000 normalNodes: 4000 diff --git a/test/integration/scheduler_perf/scheduler_perf_test.go b/test/integration/scheduler_perf/scheduler_perf_test.go index 429783c349a..4143890595f 100644 --- a/test/integration/scheduler_perf/scheduler_perf_test.go +++ b/test/integration/scheduler_perf/scheduler_perf_test.go @@ -748,6 +748,15 @@ func unrollWorkloadTemplate(b *testing.B, wt []op, w *workload) []op { } func runWorkload(ctx context.Context, b *testing.B, tc *testCase, w *workload) []DataItem { + start := time.Now() + b.Cleanup(func() { + duration := time.Now().Sub(start) + // This includes startup and shutdown time and thus does not + // reflect scheduling performance. It's useful to get a feeling + // for how long each workload runs overall. + b.ReportMetric(duration.Seconds(), "runtime_seconds") + }) + var cfg *config.KubeSchedulerConfiguration var err error if tc.SchedulerConfigPath != nil {