From d0e3fc3561b4a71c63efe011259eb9e7aa4c5179 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Skocze=C5=84?= Date: Thu, 22 Aug 2024 09:07:03 +0000 Subject: [PATCH 1/2] Set scheduling throughput thresholds in scheduler_perf tests --- .../config/performance-config.yaml | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/test/integration/scheduler_perf/config/performance-config.yaml b/test/integration/scheduler_perf/config/performance-config.yaml index f41ebe34461..06625a9ac39 100644 --- a/test/integration/scheduler_perf/config/performance-config.yaml +++ b/test/integration/scheduler_perf/config/performance-config.yaml @@ -48,6 +48,7 @@ measurePods: 1000 - name: 5000Nodes_10000Pods labels: [performance] + threshold: 270 params: initNodes: 5000 initPods: 1000 @@ -89,6 +90,7 @@ measurePods: 1000 - name: 5000Nodes_2000Pods labels: [performance] + threshold: 70 params: initNodes: 5000 initPods: 1000 @@ -125,6 +127,7 @@ measurePods: 1000 - name: 5000Nodes_10000Pods labels: [performance] + threshold: 260 params: initNodes: 5000 initPods: 1000 @@ -164,6 +167,7 @@ measurePods: 1000 - name: 5000Nodes_2000Pods labels: [performance] + threshold: 90 params: initNodes: 5000 initPods: 1000 @@ -212,6 +216,7 @@ measurePods: 1000 - name: 5000Nodes_5000Pods labels: [performance] + threshold: 35 params: initNodes: 5000 initPods: 5000 @@ -258,6 +263,7 @@ measurePods: 1000 - name: 5000Nodes_5000Pods labels: [performance] + threshold: 48 params: initNodes: 5000 initPods: 5000 @@ -303,6 +309,7 @@ measurePods: 1000 - name: 5000Nodes_5000Pods labels: [performance] + threshold: 35 params: initNodes: 5000 initPods: 5000 @@ -344,6 +351,7 @@ measurePods: 1000 - name: 5000Nodes_5000Pods labels: [performance] + threshold: 90 params: initNodes: 5000 initPods: 5000 @@ -385,6 +393,7 @@ measurePods: 1000 - name: 5000Nodes_5000Pods labels: [performance] + threshold: 90 params: initNodes: 5000 initPods: 5000 @@ -418,6 +427,7 @@ measurePods: 10 - name: 15000Nodes labels: [performance, fast] + threshold: 390 params: initNodes: 15000 measurePods: 30000 @@ -457,6 +467,7 @@ measurePods: 1000 - name: 5000Nodes_10000Pods labels: [performance] + threshold: 220 params: initNodes: 5000 initPods: 5000 @@ -498,6 +509,7 @@ measurePods: 2000 - name: 5000Nodes_5000Pods labels: [performance] + threshold: 85 params: initNodes: 5000 initPods: 5000 @@ -539,6 +551,7 @@ measurePods: 2000 - name: 5000Nodes_5000Pods labels: [performance] + threshold: 125 params: initNodes: 5000 initPods: 5000 @@ -599,6 +612,7 @@ measurePods: 1000 - name: 5000Nodes_5000Pods labels: [performance] + threshold: 140 params: initNodes: 5000 initPods: 2000 @@ -624,6 +638,7 @@ measurePods: 5 - name: 500Nodes labels: [performance, fast] + threshold: 18 params: initNodes: 500 initPods: 2000 @@ -659,6 +674,7 @@ measurePods: 5 - name: 500Nodes labels: [performance, fast] + threshold: 18 params: initNodes: 500 initPods: 2000 @@ -705,6 +721,7 @@ measurePods: 5000 - name: 5000Nodes/200InitPods/10000Pods labels: [performance] + threshold: 300 params: initNodes: 5000 initPods: 200 @@ -749,6 +766,7 @@ measurePods: 2000 - name: 5000Nodes_10000Pods labels: [performance] + threshold: 265 params: initNodes: 5000 measurePods: 10000 @@ -800,6 +818,7 @@ measurePods: 1000 - name: 5000Nodes_2000Pods labels: [performance] + threshold: 35 params: initNodes: 6000 initPodsPerNamespace: 40 @@ -853,6 +872,7 @@ measurePods: 1000 - name: 5000Nodes_2000Pods labels: [performance] + threshold: 55 params: initNodes: 5000 initPodsPerNamespace: 40 @@ -909,6 +929,7 @@ measurePods: 1000 - name: 5000Nodes_2000Pods labels: [performance] + threshold: 35 params: initNodes: 5000 initPodsPerNamespace: 50 @@ -962,6 +983,7 @@ measurePods: 1000 - name: 5000Nodes_5000Pods labels: [performance] + threshold: 90 params: initNodes: 5000 initPodsPerNamespace: 50 @@ -996,6 +1018,7 @@ measurePods: 400 - name: 5000Nodes labels: [performance, fast] + threshold: 68 params: taintNodes: 1000 normalNodes: 4000 @@ -1326,6 +1349,7 @@ measurePods: 100 - name: 1Node_10000GatedPods labels: [performance, fast] + threshold: 130 params: gatedPods: 10000 deletingPods: 20000 @@ -1358,6 +1382,7 @@ measurePods: 10 - name: 1Node_10000GatedPods labels: [performance, fast] + threshold: 110 params: gatedPods: 10000 measurePods: 20000 From 48a8cb2bc50c16c4ae93d9c2c44328d592c374f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Skocze=C5=84?= Date: Fri, 23 Aug 2024 12:49:27 +0000 Subject: [PATCH 2/2] Document throughput thresholds in scheduler_perf readme --- test/integration/scheduler_perf/README.md | 35 +++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/test/integration/scheduler_perf/README.md b/test/integration/scheduler_perf/README.md index 287ed486244..5ca8474fb33 100644 --- a/test/integration/scheduler_perf/README.md +++ b/test/integration/scheduler_perf/README.md @@ -129,3 +129,38 @@ The test cases labeled as `short` are executed in pull-kubernetes-integration jo | ci-kubernetes-integration-master | integration-test | | pull-kubernetes-integration | integration-test,short | | ci-benchmark-scheduler-perf | performance | + +## Scheduling throughput thresholds + +Thresholds are used to capture scheduler performance regressions in a periodic ci-benchmark-scheduler-perf job. +Most test cases have a threshold set for the largest `performance` workloads. +By default, these are defined for the `Average` statistic of the `SchedulingThroughput` metric. +It is possible to use other metric by configuring `thresholdMetricSelector` per test case or workload. + +### How to calculate the threshold + +The initial values for scheduling throughput thresholds were calculated through an analysis of historical data, +specifically focusing on the minimum, average, and standard deviation values for each workload +(see [#126871](https://github.com/kubernetes/kubernetes/pull/126871)). +Our goal is to set the thresholds somewhat pessimistically to minimize flakiness, +so it's recommended to set the threshold slightly below the observed historical minimum. +Depending on variability of data, the threshold can be lowered more. + +Thresholds should be adjusted based on the flakiness level and minima observed in the future. +Remember to set the value for newly added test cases as well, +but after collecting some data on workload characteristics. + +### How to determine the failed workload + +When the workload's scheduling throughput doesn't exceed the threshold, +the ci-benchmark-scheduler-perf periodic job will fail with an error log such as: + +``` +--- FAIL: BenchmarkPerfScheduling/SchedulingBasic/5000Nodes_10000Pods + ... + scheduler_perf.go:1098: ERROR: op 2: expected SchedulingThroughput Average to be higher: got 256.12, want 270 +``` + +This allows to analyze which workload failed. Make sure that the failure is not an outlier +by checking multiple runs of the job. If the failures are not related to any regression, +but to an incorrect threshold setting, it is reasonable to decrease it.