From d0e3fc3561b4a71c63efe011259eb9e7aa4c5179 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maciej=20Skocze=C5=84?= <mskoczen@google.com>
Date: Thu, 22 Aug 2024 09:07:03 +0000
Subject: [PATCH 1/2] Set scheduling throughput thresholds in scheduler_perf
 tests

---
 .../config/performance-config.yaml            | 25 +++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/test/integration/scheduler_perf/config/performance-config.yaml b/test/integration/scheduler_perf/config/performance-config.yaml
index f41ebe34461..06625a9ac39 100644
--- a/test/integration/scheduler_perf/config/performance-config.yaml
+++ b/test/integration/scheduler_perf/config/performance-config.yaml
@@ -48,6 +48,7 @@
       measurePods: 1000
   - name: 5000Nodes_10000Pods
     labels: [performance]
+    threshold: 270
     params:
       initNodes: 5000
       initPods: 1000
@@ -89,6 +90,7 @@
       measurePods: 1000
   - name: 5000Nodes_2000Pods
     labels: [performance]
+    threshold: 70
     params:
       initNodes: 5000
       initPods: 1000
@@ -125,6 +127,7 @@
       measurePods: 1000
   - name: 5000Nodes_10000Pods
     labels: [performance]
+    threshold: 260
     params:
       initNodes: 5000
       initPods: 1000
@@ -164,6 +167,7 @@
       measurePods: 1000
   - name: 5000Nodes_2000Pods
     labels: [performance]
+    threshold: 90
     params:
       initNodes: 5000
       initPods: 1000
@@ -212,6 +216,7 @@
       measurePods: 1000
   - name: 5000Nodes_5000Pods
     labels: [performance]
+    threshold: 35
     params:
       initNodes: 5000
       initPods: 5000
@@ -258,6 +263,7 @@
       measurePods: 1000
   - name: 5000Nodes_5000Pods
     labels: [performance]
+    threshold: 48
     params:
       initNodes: 5000
       initPods: 5000
@@ -303,6 +309,7 @@
       measurePods: 1000
   - name: 5000Nodes_5000Pods
     labels: [performance]
+    threshold: 35
     params:
       initNodes: 5000
       initPods: 5000
@@ -344,6 +351,7 @@
       measurePods: 1000
   - name: 5000Nodes_5000Pods
     labels: [performance]
+    threshold: 90
     params:
       initNodes: 5000
       initPods: 5000
@@ -385,6 +393,7 @@
       measurePods: 1000
   - name: 5000Nodes_5000Pods
     labels: [performance]
+    threshold: 90
     params:
       initNodes: 5000
       initPods: 5000
@@ -418,6 +427,7 @@
       measurePods: 10
   - name: 15000Nodes
     labels: [performance, fast]
+    threshold: 390
     params:
       initNodes: 15000
       measurePods: 30000
@@ -457,6 +467,7 @@
       measurePods: 1000
   - name: 5000Nodes_10000Pods
     labels: [performance]
+    threshold: 220
     params:
       initNodes: 5000
       initPods: 5000
@@ -498,6 +509,7 @@
       measurePods: 2000
   - name: 5000Nodes_5000Pods
     labels: [performance]
+    threshold: 85
     params:
       initNodes: 5000
       initPods: 5000
@@ -539,6 +551,7 @@
       measurePods: 2000
   - name: 5000Nodes_5000Pods
     labels: [performance]
+    threshold: 125
     params:
       initNodes: 5000
       initPods: 5000
@@ -599,6 +612,7 @@
       measurePods: 1000
   - name: 5000Nodes_5000Pods
     labels: [performance]
+    threshold: 140
     params:
       initNodes: 5000
       initPods: 2000
@@ -624,6 +638,7 @@
       measurePods: 5
   - name: 500Nodes
     labels: [performance, fast]
+    threshold: 18
     params:
       initNodes: 500
       initPods: 2000
@@ -659,6 +674,7 @@
       measurePods: 5
   - name: 500Nodes
     labels: [performance, fast]
+    threshold: 18
     params:
       initNodes: 500
       initPods: 2000
@@ -705,6 +721,7 @@
       measurePods: 5000
   - name: 5000Nodes/200InitPods/10000Pods
     labels: [performance]
+    threshold: 300
     params:
       initNodes: 5000
       initPods: 200
@@ -749,6 +766,7 @@
       measurePods: 2000
   - name: 5000Nodes_10000Pods
     labels: [performance]
+    threshold: 265
     params:
       initNodes: 5000
       measurePods: 10000
@@ -800,6 +818,7 @@
       measurePods: 1000
   - name: 5000Nodes_2000Pods
     labels: [performance]
+    threshold: 35
     params:
       initNodes: 6000
       initPodsPerNamespace: 40
@@ -853,6 +872,7 @@
       measurePods: 1000
   - name: 5000Nodes_2000Pods
     labels: [performance]
+    threshold: 55
     params:
       initNodes: 5000
       initPodsPerNamespace: 40
@@ -909,6 +929,7 @@
       measurePods: 1000
   - name: 5000Nodes_2000Pods
     labels: [performance]
+    threshold: 35
     params:
       initNodes: 5000
       initPodsPerNamespace: 50
@@ -962,6 +983,7 @@
       measurePods: 1000
   - name: 5000Nodes_5000Pods
     labels: [performance]
+    threshold: 90
     params:
       initNodes: 5000
       initPodsPerNamespace: 50
@@ -996,6 +1018,7 @@
       measurePods: 400
   - name: 5000Nodes
     labels: [performance, fast]
+    threshold: 68
     params:
       taintNodes: 1000
       normalNodes: 4000
@@ -1326,6 +1349,7 @@
       measurePods: 100
   - name: 1Node_10000GatedPods
     labels: [performance, fast]
+    threshold: 130
     params:
       gatedPods: 10000
       deletingPods: 20000
@@ -1358,6 +1382,7 @@
       measurePods: 10
   - name: 1Node_10000GatedPods
     labels: [performance, fast]
+    threshold: 110
     params:
       gatedPods: 10000
       measurePods: 20000

From 48a8cb2bc50c16c4ae93d9c2c44328d592c374f4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maciej=20Skocze=C5=84?= <mskoczen@google.com>
Date: Fri, 23 Aug 2024 12:49:27 +0000
Subject: [PATCH 2/2] Document throughput thresholds in scheduler_perf readme

---
 test/integration/scheduler_perf/README.md | 35 +++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/test/integration/scheduler_perf/README.md b/test/integration/scheduler_perf/README.md
index 287ed486244..5ca8474fb33 100644
--- a/test/integration/scheduler_perf/README.md
+++ b/test/integration/scheduler_perf/README.md
@@ -129,3 +129,38 @@ The test cases labeled as `short` are executed in pull-kubernetes-integration jo
 | ci-kubernetes-integration-master | integration-test       |
 | pull-kubernetes-integration      | integration-test,short |
 | ci-benchmark-scheduler-perf      | performance            |
+
+## Scheduling throughput thresholds
+
+Thresholds are used to capture scheduler performance regressions in a periodic ci-benchmark-scheduler-perf job. 
+Most test cases have a threshold set for the largest `performance` workloads. 
+By default, these are defined for the `Average` statistic of the `SchedulingThroughput` metric. 
+It is possible to use other metric by configuring `thresholdMetricSelector` per test case or workload. 
+
+### How to calculate the threshold
+
+The initial values for scheduling throughput thresholds were calculated through an analysis of historical data, 
+specifically focusing on the minimum, average, and standard deviation values for each workload 
+(see [#126871](https://github.com/kubernetes/kubernetes/pull/126871)). 
+Our goal is to set the thresholds somewhat pessimistically to minimize flakiness, 
+so it's recommended to set the threshold slightly below the observed historical minimum. 
+Depending on variability of data, the threshold can be lowered more. 
+
+Thresholds should be adjusted based on the flakiness level and minima observed in the future. 
+Remember to set the value for newly added test cases as well, 
+but after collecting some data on workload characteristics.
+
+### How to determine the failed workload
+
+When the workload's scheduling throughput doesn't exceed the threshold, 
+the ci-benchmark-scheduler-perf periodic job will fail with an error log such as:
+
+```
+--- FAIL: BenchmarkPerfScheduling/SchedulingBasic/5000Nodes_10000Pods
+    ...
+    scheduler_perf.go:1098: ERROR: op 2: expected SchedulingThroughput Average to be higher: got 256.12, want 270
+```
+
+This allows to analyze which workload failed. Make sure that the failure is not an outlier 
+by checking multiple runs of the job. If the failures are not related to any regression, 
+but to an incorrect threshold setting, it is reasonable to decrease it.