mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-29 22:46:12 +00:00
Merge pull request #128411 from macsko/split_scheduler_perf_tests
Split scheduler_perf config into subdirectories
This commit is contained in:
commit
57438d0b8f
@ -224,23 +224,23 @@ dependencies:
|
||||
match: registry.k8s.io\/pause:\d+\.\d+
|
||||
- path: test/integration/benchmark-controller.json
|
||||
match: registry.k8s.io\/pause:\d+\.\d+
|
||||
- path: test/integration/scheduler_perf/config/templates/pod-default.yaml
|
||||
- path: test/integration/scheduler_perf/templates/pod-default.yaml
|
||||
match: registry.k8s.io\/pause:\d+\.\d+
|
||||
- path: test/integration/scheduler_perf/config/templates/pod-with-node-affinity.yaml
|
||||
- path: test/integration/scheduler_perf/templates/pod-with-node-affinity.yaml
|
||||
match: registry.k8s.io\/pause:\d+\.\d+
|
||||
- path: test/integration/scheduler_perf/config/templates/pod-with-pod-affinity.yaml
|
||||
- path: test/integration/scheduler_perf/templates/pod-with-pod-affinity.yaml
|
||||
match: registry.k8s.io\/pause:\d+\.\d+
|
||||
- path: test/integration/scheduler_perf/config/templates/pod-with-pod-anti-affinity.yaml
|
||||
- path: test/integration/scheduler_perf/templates/pod-with-pod-anti-affinity.yaml
|
||||
match: registry.k8s.io\/pause:\d+\.\d+
|
||||
- path: test/integration/scheduler_perf/config/templates/pod-with-preferred-pod-affinity.yaml
|
||||
- path: test/integration/scheduler_perf/templates/pod-with-preferred-pod-affinity.yaml
|
||||
match: registry.k8s.io\/pause:\d+\.\d+
|
||||
- path: test/integration/scheduler_perf/config/templates/pod-with-preferred-pod-anti-affinity.yaml
|
||||
- path: test/integration/scheduler_perf/templates/pod-with-preferred-pod-anti-affinity.yaml
|
||||
match: registry.k8s.io\/pause:\d+\.\d+
|
||||
- path: test/integration/scheduler_perf/config/templates/pod-with-preferred-topology-spreading.yaml
|
||||
- path: test/integration/scheduler_perf/templates/pod-with-preferred-topology-spreading.yaml
|
||||
match: registry.k8s.io\/pause:\d+\.\d+
|
||||
- path: test/integration/scheduler_perf/config/templates/pod-with-secret-volume.yaml
|
||||
- path: test/integration/scheduler_perf/templates/pod-with-secret-volume.yaml
|
||||
match: registry.k8s.io\/pause:\d+\.\d+
|
||||
- path: test/integration/scheduler_perf/config/templates/pod-with-topology-spreading.yaml
|
||||
- path: test/integration/scheduler_perf/templates/pod-with-topology-spreading.yaml
|
||||
match: registry.k8s.io\/pause:\d+\.\d+
|
||||
- path: test/utils/image/manifest.go
|
||||
match: configs\[Pause\] = Config{list\.GcRegistry, "pause", "\d+\.\d+(.\d+)?"}
|
||||
|
@ -33,10 +33,10 @@ Currently the test suite has the following:
|
||||
|
||||
```shell
|
||||
# In Kubernetes root path
|
||||
make test-integration WHAT=./test/integration/scheduler_perf ETCD_LOGLEVEL=warn KUBE_TEST_VMODULE="''" KUBE_TEST_ARGS="-run=^$$ -benchtime=1ns -bench=BenchmarkPerfScheduling"
|
||||
make test-integration WHAT=./test/integration/scheduler_perf/... ETCD_LOGLEVEL=warn KUBE_TEST_VMODULE="''" KUBE_TEST_ARGS="-run=^$$ -benchtime=1ns -bench=BenchmarkPerfScheduling"
|
||||
```
|
||||
|
||||
The benchmark suite runs all the tests specified under config/performance-config.yaml.
|
||||
The benchmark suite runs all the tests specified under subdirectories split by topic (`<topic>/performance-config.yaml`).
|
||||
By default, it runs all workloads that have the "performance" label. In the configuration,
|
||||
labels can be added to a test case and/or individual workloads. Each workload also has
|
||||
all labels of its test case. The `perf-scheduling-label-filter` command line flag can
|
||||
@ -46,11 +46,12 @@ a comma-separated list of label names. Each label may have a `+` or `-` as prefi
|
||||
be set. For example, this runs all performance benchmarks except those that are labeled
|
||||
as "integration-test":
|
||||
```shell
|
||||
make test-integration WHAT=./test/integration/scheduler_perf ETCD_LOGLEVEL=warn KUBE_TEST_VMODULE="''" KUBE_TEST_ARGS="-run=^$$ -benchtime=1ns -bench=BenchmarkPerfScheduling -perf-scheduling-label-filter=performance,-integration-test"
|
||||
make test-integration WHAT=./test/integration/scheduler_perf/... ETCD_LOGLEVEL=warn KUBE_TEST_VMODULE="''" KUBE_TEST_ARGS="-run=^$$ -benchtime=1ns -bench=BenchmarkPerfScheduling -perf-scheduling-label-filter=performance,-integration-test"
|
||||
```
|
||||
|
||||
Once the benchmark is finished, JSON file with metrics is available in the current directory (test/integration/scheduler_perf). Look for `BenchmarkPerfScheduling_benchmark_YYYY-MM-DDTHH:MM:SSZ.json`.
|
||||
You can use `-data-items-dir` to generate the metrics file elsewhere.
|
||||
Once the benchmark is finished, JSON files with metrics are available in the subdirectories (`test/integration/scheduler_perf/config/<topic>`).
|
||||
Look for `BenchmarkPerfScheduling_benchmark_YYYY-MM-DDTHH:MM:SSZ.json`.
|
||||
You can use `-data-items-dir` to generate the metrics files elsewhere.
|
||||
|
||||
In case you want to run a specific test in the suite, you can specify the test through `-bench` flag:
|
||||
|
||||
@ -59,19 +60,19 @@ Otherwise, the golang benchmark framework will try to run a test more than once
|
||||
|
||||
```shell
|
||||
# In Kubernetes root path
|
||||
make test-integration WHAT=./test/integration/scheduler_perf ETCD_LOGLEVEL=warn KUBE_TEST_VMODULE="''" KUBE_TEST_ARGS="-run=^$$ -benchtime=1ns -bench=BenchmarkPerfScheduling/SchedulingBasic/5000Nodes/5000InitPods/1000PodsToSchedule"
|
||||
make test-integration WHAT=./test/integration/scheduler_perf/... ETCD_LOGLEVEL=warn KUBE_TEST_VMODULE="''" KUBE_TEST_ARGS="-run=^$$ -benchtime=1ns -bench=BenchmarkPerfScheduling/SchedulingBasic/5000Nodes/5000InitPods/1000PodsToSchedule"
|
||||
```
|
||||
|
||||
To produce a cpu profile:
|
||||
|
||||
```shell
|
||||
# In Kubernetes root path
|
||||
make test-integration WHAT=./test/integration/scheduler_perf KUBE_TIMEOUT="-timeout=3600s" ETCD_LOGLEVEL=warn KUBE_TEST_VMODULE="''" KUBE_TEST_ARGS="-run=^$$ -benchtime=1ns -bench=BenchmarkPerfScheduling -cpuprofile ~/cpu-profile.out"
|
||||
make test-integration WHAT=./test/integration/scheduler_perf/... KUBE_TIMEOUT="-timeout=3600s" ETCD_LOGLEVEL=warn KUBE_TEST_VMODULE="''" KUBE_TEST_ARGS="-run=^$$ -benchtime=1ns -bench=BenchmarkPerfScheduling -cpuprofile ~/cpu-profile.out"
|
||||
```
|
||||
|
||||
### How to configure benchmark tests
|
||||
|
||||
Configuration file located under `config/performance-config.yaml` contains a list of templates.
|
||||
Configuration files located under `<topic>/performance-config.yaml` contain a list of templates.
|
||||
Each template allows to set:
|
||||
- node manifest
|
||||
- manifests for initial and testing pod
|
||||
@ -85,7 +86,7 @@ for available operations to build `WorkloadTemplate`.
|
||||
Initial pods create a state of a cluster before the scheduler performance measurement can begin.
|
||||
Testing pods are then subject to performance measurement.
|
||||
|
||||
The configuration file under `config/performance-config.yaml` contains a default list of templates to cover
|
||||
The configuration files under `<topic>/performance-config.yaml` contain a default list of templates to cover
|
||||
various scenarios. In case you want to add your own, you can extend the list with new templates.
|
||||
It's also possible to extend `op` data type, respectively its underlying data types
|
||||
to extend configuration of possible test cases.
|
||||
@ -115,10 +116,10 @@ removes that file only if the test passed.
|
||||
|
||||
To run integration tests, use:
|
||||
```
|
||||
make test-integration WHAT=./test/integration/scheduler_perf KUBE_TEST_ARGS=-use-testing-log
|
||||
make test-integration WHAT=./test/integration/scheduler_perf/... KUBE_TEST_ARGS=-use-testing-log
|
||||
```
|
||||
|
||||
Integration testing uses the same `config/performance-config.yaml` as
|
||||
Integration testing uses the same configs (`<topic>/performance-config.yaml`) as
|
||||
benchmarking. By default, workloads labeled as `integration-test`
|
||||
are executed as part of integration testing (in ci-kubernetes-integration-master job).
|
||||
`-test-scheduling-label-filter` can be used to change that.
|
||||
@ -139,7 +140,7 @@ The test cases labeled as `short` are executed in pull-kubernetes-integration jo
|
||||
| pull-kubernetes-integration | integration-test,short |
|
||||
| ci-benchmark-scheduler-perf | performance |
|
||||
|
||||
See the comment on [./config/performance-config.yaml](./config/performance-config.yaml) for the details.
|
||||
See the comment on [./misc/performance-config.yaml](./misc/performance-config.yaml) for the details.
|
||||
|
||||
## Scheduling throughput thresholds
|
||||
|
||||
@ -182,15 +183,15 @@ Some support for visualizing progress over time is built into the
|
||||
benchmarks. The measurement operation which creates pods writes .dat files like
|
||||
this:
|
||||
|
||||
test/integration/scheduler_perf/SchedulingBasic_5000Nodes_2023-03-17T14:52:09Z.dat
|
||||
test/integration/scheduler_perf/misc/SchedulingBasic_5000Nodes_2023-03-17T14:52:09Z.dat
|
||||
|
||||
This file is in a text format that [gnuplot](http://www.gnuplot.info/) can
|
||||
read. A wrapper script selects some suitable parameters:
|
||||
|
||||
test/integration/scheduler_perf/gnuplot.sh test/integration/scheduler_perf/*.dat
|
||||
test/integration/scheduler_perf/gnuplot.sh test/integration/scheduler_perf/*/*.dat
|
||||
|
||||
It plots in an interactive window by default. To write into a file, use
|
||||
|
||||
test/integration/scheduler_perf/gnuplot.sh \
|
||||
-e 'set term png; set output "<output>.png"' \
|
||||
test/integration/scheduler_perf/*.dat
|
||||
test/integration/scheduler_perf/*/*.dat
|
||||
|
43
test/integration/scheduler_perf/affinity/affinity_test.go
Normal file
43
test/integration/scheduler_perf/affinity/affinity_test.go
Normal file
@ -0,0 +1,43 @@
|
||||
/*
|
||||
Copyright 2024 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package affinity
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
_ "k8s.io/component-base/logs/json/register"
|
||||
perf "k8s.io/kubernetes/test/integration/scheduler_perf"
|
||||
)
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
if err := perf.InitTests(); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "%v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
m.Run()
|
||||
}
|
||||
|
||||
func TestSchedulerPerf(t *testing.T) {
|
||||
perf.RunIntegrationPerfScheduling(t, "performance-config.yaml")
|
||||
}
|
||||
|
||||
func BenchmarkPerfScheduling(b *testing.B) {
|
||||
perf.RunBenchmarkPerfScheduling(b, "performance-config.yaml", "affinity", nil)
|
||||
}
|
542
test/integration/scheduler_perf/affinity/performance-config.yaml
Normal file
542
test/integration/scheduler_perf/affinity/performance-config.yaml
Normal file
@ -0,0 +1,542 @@
|
||||
# The following labels are used in this file. (listed in ascending order of the number of covered test cases)
|
||||
#
|
||||
# - integration-test: test cases to run as the integration test, usually to spot some issues in the scheduler implementation or scheduler-perf itself.
|
||||
# - performance: test cases to run in the performance test.
|
||||
# - short: supplemental label for the above two labels (must not used alone), which literally means short execution time test cases.
|
||||
#
|
||||
# Specifically, the CIs use labels like the following:
|
||||
# - `ci-kubernetes-integration-master` (`integration-test`): Test cases are chosen based on a tradeoff between code coverage and overall runtime.
|
||||
# It basically covers all test cases but with their smallest workload.
|
||||
# - `pull-kubernetes-integration` (`integration-test`,`short`): Test cases are chosen so that they should take less than total 5 min to complete.
|
||||
# - `ci-benchmark-scheduler-perf` (`performance`): Long enough test cases are chosen (ideally, longer than 10 seconds)
|
||||
# to provide meaningful samples for the pod scheduling rate.
|
||||
#
|
||||
# Also, `performance`+`short` isn't used in the CIs, but it's used to test the performance test locally.
|
||||
# (Sometimes, the test cases with `integration-test` are too small to spot issues.)
|
||||
#
|
||||
# Combining `performance` and `short` selects suitable workloads for a local
|
||||
# before/after comparisons with benchstat.
|
||||
|
||||
- name: SchedulingPodAntiAffinity
|
||||
defaultPodTemplatePath: ../templates/pod-with-pod-anti-affinity.yaml
|
||||
workloadTemplate:
|
||||
- opcode: createNodes
|
||||
countParam: $initNodes
|
||||
- opcode: createNamespaces
|
||||
prefix: sched
|
||||
count: 2
|
||||
- opcode: createPods
|
||||
countParam: $initPods
|
||||
namespace: sched-0
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
collectMetrics: true
|
||||
namespace: sched-1
|
||||
workloads:
|
||||
- name: 5Nodes
|
||||
labels: [integration-test, short]
|
||||
params:
|
||||
initNodes: 5
|
||||
initPods: 1
|
||||
measurePods: 4
|
||||
- name: 500Nodes
|
||||
labels: [performance, short]
|
||||
params:
|
||||
initNodes: 500
|
||||
initPods: 100
|
||||
measurePods: 400
|
||||
- name: 5000Nodes
|
||||
labels: [performance, short]
|
||||
params:
|
||||
initNodes: 5000
|
||||
initPods: 1000
|
||||
measurePods: 1000
|
||||
- name: 5000Nodes_2000Pods
|
||||
labels: [performance]
|
||||
threshold: 70
|
||||
params:
|
||||
initNodes: 5000
|
||||
initPods: 1000
|
||||
measurePods: 2000
|
||||
|
||||
- name: SchedulingPodAffinity
|
||||
defaultPodTemplatePath: ../templates/pod-with-pod-affinity.yaml
|
||||
workloadTemplate:
|
||||
- opcode: createNodes
|
||||
countParam: $initNodes
|
||||
nodeTemplatePath: ../templates/node-default.yaml
|
||||
labelNodePrepareStrategy:
|
||||
labelKey: "topology.kubernetes.io/zone"
|
||||
labelValues: ["zone1"]
|
||||
- opcode: createNamespaces
|
||||
prefix: sched
|
||||
count: 2
|
||||
- opcode: createPods
|
||||
countParam: $initPods
|
||||
namespace: sched-0
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
namespace: sched-1
|
||||
collectMetrics: true
|
||||
workloads:
|
||||
- name: 5Nodes
|
||||
labels: [integration-test, short]
|
||||
params:
|
||||
initNodes: 5
|
||||
initPods: 5
|
||||
measurePods: 10
|
||||
- name: 500Nodes
|
||||
labels: [performance, short]
|
||||
params:
|
||||
initNodes: 500
|
||||
initPods: 500
|
||||
measurePods: 1000
|
||||
- name: 5000Nodes
|
||||
labels: [performance, short]
|
||||
params:
|
||||
initNodes: 5000
|
||||
initPods: 5000
|
||||
measurePods: 1000
|
||||
- name: 5000Nodes_5000Pods
|
||||
labels: [performance]
|
||||
threshold: 35
|
||||
params:
|
||||
initNodes: 5000
|
||||
initPods: 5000
|
||||
measurePods: 5000
|
||||
|
||||
- name: SchedulingPreferredPodAffinity
|
||||
defaultPodTemplatePath: ../templates/pod-with-preferred-pod-affinity.yaml
|
||||
workloadTemplate:
|
||||
- opcode: createNodes
|
||||
countParam: $initNodes
|
||||
- opcode: createNamespaces
|
||||
prefix: sched
|
||||
count: 2
|
||||
- opcode: createPods
|
||||
countParam: $initPods
|
||||
namespace: sched-0
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
namespace: sched-1
|
||||
collectMetrics: true
|
||||
workloads:
|
||||
- name: 5Nodes
|
||||
labels: [integration-test, short]
|
||||
params:
|
||||
initNodes: 5
|
||||
initPods: 5
|
||||
measurePods: 10
|
||||
- name: 500Nodes
|
||||
labels: [performance, short]
|
||||
params:
|
||||
initNodes: 500
|
||||
initPods: 500
|
||||
measurePods: 1000
|
||||
- name: 5000Nodes
|
||||
labels: [performance]
|
||||
params:
|
||||
initNodes: 5000
|
||||
initPods: 5000
|
||||
measurePods: 1000
|
||||
- name: 5000Nodes_5000Pods
|
||||
labels: [performance]
|
||||
threshold: 90
|
||||
params:
|
||||
initNodes: 5000
|
||||
initPods: 5000
|
||||
measurePods: 5000
|
||||
|
||||
- name: SchedulingPreferredPodAntiAffinity
|
||||
defaultPodTemplatePath: ../templates/pod-with-preferred-pod-affinity.yaml
|
||||
workloadTemplate:
|
||||
- opcode: createNodes
|
||||
countParam: $initNodes
|
||||
- opcode: createNamespaces
|
||||
prefix: sched
|
||||
count: 2
|
||||
- opcode: createPods
|
||||
countParam: $initPods
|
||||
namespace: sched-0
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
namespace: sched-1
|
||||
collectMetrics: true
|
||||
workloads:
|
||||
- name: 5Nodes
|
||||
labels: [integration-test, short]
|
||||
params:
|
||||
initNodes: 5
|
||||
initPods: 5
|
||||
measurePods: 10
|
||||
- name: 500Nodes
|
||||
labels: [performance, short]
|
||||
params:
|
||||
initNodes: 500
|
||||
initPods: 500
|
||||
measurePods: 1000
|
||||
- name: 5000Nodes
|
||||
labels: [performance, short]
|
||||
params:
|
||||
initNodes: 5000
|
||||
initPods: 5000
|
||||
measurePods: 1000
|
||||
- name: 5000Nodes_5000Pods
|
||||
labels: [performance]
|
||||
threshold: 90
|
||||
params:
|
||||
initNodes: 5000
|
||||
initPods: 5000
|
||||
measurePods: 5000
|
||||
|
||||
- name: SchedulingNodeAffinity
|
||||
defaultPodTemplatePath: ../templates/pod-with-node-affinity.yaml
|
||||
workloadTemplate:
|
||||
- opcode: createNodes
|
||||
countParam: $initNodes
|
||||
nodeTemplatePath: ../templates/node-default.yaml
|
||||
labelNodePrepareStrategy:
|
||||
labelKey: "topology.kubernetes.io/zone"
|
||||
labelValues: ["zone1"]
|
||||
- opcode: createPods
|
||||
countParam: $initPods
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
collectMetrics: true
|
||||
workloads:
|
||||
- name: 5Nodes
|
||||
labels: [integration-test, short]
|
||||
params:
|
||||
initNodes: 5
|
||||
initPods: 5
|
||||
measurePods: 10
|
||||
- name: 500Nodes
|
||||
labels: [performance, short]
|
||||
params:
|
||||
initNodes: 500
|
||||
initPods: 500
|
||||
measurePods: 1000
|
||||
- name: 5000Nodes
|
||||
labels: [performance, short]
|
||||
params:
|
||||
initNodes: 5000
|
||||
initPods: 5000
|
||||
measurePods: 1000
|
||||
- name: 5000Nodes_10000Pods
|
||||
labels: [performance]
|
||||
threshold: 220
|
||||
params:
|
||||
initNodes: 5000
|
||||
initPods: 5000
|
||||
measurePods: 10000
|
||||
|
||||
- name: MixedSchedulingBasePod
|
||||
defaultPodTemplatePath: ../templates/pod-default.yaml
|
||||
workloadTemplate:
|
||||
- opcode: createNodes
|
||||
countParam: $initNodes
|
||||
nodeTemplatePath: ../templates/node-default.yaml
|
||||
labelNodePrepareStrategy:
|
||||
labelKey: "topology.kubernetes.io/zone"
|
||||
labelValues: ["zone1"]
|
||||
- opcode: createNamespaces
|
||||
prefix: sched
|
||||
count: 1
|
||||
- opcode: createPods
|
||||
countParam: $initPods
|
||||
namespace: sched-0
|
||||
- opcode: createPods
|
||||
countParam: $initPods
|
||||
podTemplatePath: ../templates/pod-with-pod-affinity.yaml
|
||||
namespace: sched-0
|
||||
- opcode: createPods
|
||||
countParam: $initPods
|
||||
podTemplatePath: ../templates/pod-with-pod-anti-affinity.yaml
|
||||
namespace: sched-0
|
||||
- opcode: createPods
|
||||
countParam: $initPods
|
||||
podTemplatePath: ../templates/pod-with-preferred-pod-affinity.yaml
|
||||
namespace: sched-0
|
||||
- opcode: createPods
|
||||
countParam: $initPods
|
||||
podTemplatePath: ../templates/pod-with-preferred-pod-anti-affinity.yaml
|
||||
namespace: sched-0
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
collectMetrics: true
|
||||
workloads:
|
||||
- name: 5Nodes
|
||||
labels: [performance, short]
|
||||
params:
|
||||
initNodes: 5
|
||||
initPods: 2
|
||||
measurePods: 10
|
||||
- name: 500Nodes
|
||||
labels: [performance, short]
|
||||
params:
|
||||
initNodes: 500
|
||||
initPods: 200
|
||||
measurePods: 1000
|
||||
- name: 5000Nodes
|
||||
labels: [performance]
|
||||
params:
|
||||
initNodes: 5000
|
||||
initPods: 2000
|
||||
measurePods: 1000
|
||||
- name: 5000Nodes_5000Pods
|
||||
labels: [performance]
|
||||
threshold: 140
|
||||
params:
|
||||
initNodes: 5000
|
||||
initPods: 2000
|
||||
measurePods: 5000
|
||||
|
||||
- name: SchedulingRequiredPodAntiAffinityWithNSSelector
|
||||
defaultPodTemplatePath: ../templates/pod-anti-affinity-ns-selector.yaml
|
||||
workloadTemplate:
|
||||
- opcode: createNodes
|
||||
countParam: $initNodes
|
||||
- opcode: createNamespaces
|
||||
prefix: init-ns
|
||||
countParam: $initNamespaces
|
||||
namespaceTemplatePath: ../templates/namespace-with-labels.yaml
|
||||
- opcode: createNamespaces
|
||||
prefix: measure-ns
|
||||
count: 1
|
||||
namespaceTemplatePath: ../templates/namespace-with-labels.yaml
|
||||
- opcode: createPodSets
|
||||
countParam: $initNamespaces
|
||||
namespacePrefix: init-ns
|
||||
createPodsOp:
|
||||
opcode: createPods
|
||||
countParam: $initPodsPerNamespace
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
collectMetrics: true
|
||||
namespace: measure-ns-0
|
||||
workloads:
|
||||
- name: 10Nodes
|
||||
labels: [performance, short]
|
||||
params:
|
||||
initNodes: 10
|
||||
initPodsPerNamespace: 2
|
||||
initNamespaces: 2
|
||||
measurePods: 6
|
||||
- name: 500Nodes
|
||||
labels: [performance, short]
|
||||
params:
|
||||
initNodes: 500
|
||||
initPodsPerNamespace: 4
|
||||
initNamespaces: 10
|
||||
measurePods: 100
|
||||
- name: 5000Nodes
|
||||
labels: [performance]
|
||||
params:
|
||||
initNodes: 5000
|
||||
initPodsPerNamespace: 40
|
||||
initNamespaces: 100
|
||||
measurePods: 1000
|
||||
- name: 5000Nodes_2000Pods
|
||||
labels: [performance]
|
||||
threshold: 24
|
||||
params:
|
||||
initNodes: 6000
|
||||
initPodsPerNamespace: 40
|
||||
initNamespaces: 100
|
||||
measurePods: 2000
|
||||
|
||||
- name: SchedulingPreferredAntiAffinityWithNSSelector
|
||||
defaultPodTemplatePath: ../templates/pod-preferred-anti-affinity-ns-selector.yaml
|
||||
workloadTemplate:
|
||||
- opcode: createNodes
|
||||
countParam: $initNodes
|
||||
- opcode: createNamespaces
|
||||
prefix: init-ns
|
||||
countParam: $initNamespaces
|
||||
namespaceTemplatePath: ../templates/namespace-with-labels.yaml
|
||||
- opcode: createNamespaces
|
||||
prefix: measure-ns
|
||||
count: 1
|
||||
namespaceTemplatePath: ../templates/namespace-with-labels.yaml
|
||||
- opcode: createPodSets
|
||||
countParam: $initNamespaces
|
||||
namespacePrefix: init-ns
|
||||
createPodsOp:
|
||||
opcode: createPods
|
||||
countParam: $initPodsPerNamespace
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
collectMetrics: true
|
||||
namespace: measure-ns-0
|
||||
workloads:
|
||||
- name: 10Nodes
|
||||
labels: [performance, short]
|
||||
params:
|
||||
initNodes: 10
|
||||
initPodsPerNamespace: 2
|
||||
initNamespaces: 2
|
||||
measurePods: 10
|
||||
- name: 500Nodes
|
||||
labels: [performance, short]
|
||||
params:
|
||||
initNodes: 500
|
||||
initPodsPerNamespace: 4
|
||||
initNamespaces: 10
|
||||
measurePods: 100
|
||||
- name: 5000Nodes
|
||||
labels: [performance]
|
||||
params:
|
||||
initNodes: 5000
|
||||
initPodsPerNamespace: 40
|
||||
initNamespaces: 100
|
||||
measurePods: 1000
|
||||
- name: 5000Nodes_2000Pods
|
||||
labels: [performance]
|
||||
threshold: 55
|
||||
params:
|
||||
initNodes: 5000
|
||||
initPodsPerNamespace: 40
|
||||
initNamespaces: 100
|
||||
measurePods: 2000
|
||||
|
||||
- name: SchedulingRequiredPodAffinityWithNSSelector
|
||||
defaultPodTemplatePath: ../templates/pod-affinity-ns-selector.yaml
|
||||
workloadTemplate:
|
||||
- opcode: createNodes
|
||||
countParam: $initNodes
|
||||
labelNodePrepareStrategy:
|
||||
labelKey: "topology.kubernetes.io/zone"
|
||||
labelValues: ["zone1"]
|
||||
- opcode: createNamespaces
|
||||
prefix: init-ns
|
||||
countParam: $initNamespaces
|
||||
namespaceTemplatePath: ../templates/namespace-with-labels.yaml
|
||||
- opcode: createNamespaces
|
||||
prefix: measure-ns
|
||||
count: 1
|
||||
namespaceTemplatePath: ../templates/namespace-with-labels.yaml
|
||||
- opcode: createPodSets
|
||||
countParam: $initNamespaces
|
||||
namespacePrefix: init-ns
|
||||
createPodsOp:
|
||||
opcode: createPods
|
||||
countParam: $initPodsPerNamespace
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
collectMetrics: true
|
||||
namespace: measure-ns-0
|
||||
workloads:
|
||||
- name: 10Nodes
|
||||
labels: [performance, short]
|
||||
params:
|
||||
initNodes: 10
|
||||
initPodsPerNamespace: 2
|
||||
initNamespaces: 2
|
||||
measurePods: 10
|
||||
- name: 500Nodes
|
||||
labels: [performance, short]
|
||||
params:
|
||||
initNodes: 500
|
||||
initPodsPerNamespace: 4
|
||||
initNamespaces: 10
|
||||
measurePods: 100
|
||||
- name: 5000Nodes
|
||||
labels: [performance]
|
||||
params:
|
||||
initNodes: 5000
|
||||
initPodsPerNamespace: 50
|
||||
initNamespaces: 100
|
||||
measurePods: 1000
|
||||
- name: 5000Nodes_2000Pods
|
||||
labels: [performance]
|
||||
threshold: 35
|
||||
params:
|
||||
initNodes: 5000
|
||||
initPodsPerNamespace: 50
|
||||
initNamespaces: 100
|
||||
measurePods: 2000
|
||||
|
||||
- name: SchedulingPreferredAffinityWithNSSelector
|
||||
defaultPodTemplatePath: ../templates/pod-preferred-affinity-ns-selector.yaml
|
||||
workloadTemplate:
|
||||
- opcode: createNodes
|
||||
countParam: $initNodes
|
||||
- opcode: createNamespaces
|
||||
prefix: init-ns
|
||||
countParam: $initNamespaces
|
||||
namespaceTemplatePath: ../templates/namespace-with-labels.yaml
|
||||
- opcode: createNamespaces
|
||||
prefix: measure-ns
|
||||
count: 1
|
||||
namespaceTemplatePath: ../templates/namespace-with-labels.yaml
|
||||
- opcode: createPodSets
|
||||
countParam: $initNamespaces
|
||||
namespacePrefix: init-ns
|
||||
createPodsOp:
|
||||
opcode: createPods
|
||||
countParam: $initPodsPerNamespace
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
collectMetrics: true
|
||||
namespace: measure-ns-0
|
||||
workloads:
|
||||
- name: 10Nodes
|
||||
labels: [performance, short]
|
||||
params:
|
||||
initNodes: 10
|
||||
initPodsPerNamespace: 2
|
||||
initNamespaces: 2
|
||||
measurePods: 10
|
||||
- name: 500Nodes
|
||||
labels: [performance, short]
|
||||
params:
|
||||
initNodes: 500
|
||||
initPodsPerNamespace: 4
|
||||
initNamespaces: 10
|
||||
measurePods: 100
|
||||
- name: 5000Nodes
|
||||
labels: [performance]
|
||||
params:
|
||||
initNodes: 5000
|
||||
initPodsPerNamespace: 50
|
||||
initNamespaces: 100
|
||||
measurePods: 1000
|
||||
- name: 5000Nodes_5000Pods
|
||||
labels: [performance]
|
||||
threshold: 90
|
||||
params:
|
||||
initNodes: 5000
|
||||
initPodsPerNamespace: 50
|
||||
initNamespaces: 100
|
||||
measurePods: 5000
|
||||
|
||||
- name: SchedulingGatedPodsWithPodAffinityImpactForThroughput
|
||||
defaultPodTemplatePath: ../templates/pod-with-label.yaml
|
||||
workloadTemplate:
|
||||
- opcode: createNodes
|
||||
count: 1
|
||||
nodeTemplatePath: ../templates/node-with-name.yaml
|
||||
- opcode: createPods
|
||||
countParam: $gatedPods
|
||||
podTemplatePath: ../templates/gated-pod-with-pod-affinity.yaml
|
||||
skipWaitToCompletion: true
|
||||
- opcode: barrier
|
||||
stageRequirement: Attempted
|
||||
- opcode: createPods
|
||||
# The scheduling of those Pods will result in many cluster events (AssignedPodAdded)
|
||||
# and each of them will be processed by the scheduling queue.
|
||||
# But, the scheduling throughput should only be minimally impacted by the number of gated Pods.
|
||||
countParam: $measurePods
|
||||
collectMetrics: true
|
||||
workloads:
|
||||
- name: 1Node_10GatedPods
|
||||
labels: [performance, short]
|
||||
params:
|
||||
gatedPods: 10
|
||||
measurePods: 10
|
||||
- name: 1Node_10000GatedPods
|
||||
labels: [performance, short]
|
||||
threshold: 110
|
||||
params:
|
||||
gatedPods: 10000
|
||||
measurePods: 20000
|
File diff suppressed because it is too large
Load Diff
8
test/integration/scheduler_perf/dra/OWNERS
Normal file
8
test/integration/scheduler_perf/dra/OWNERS
Normal file
@ -0,0 +1,8 @@
|
||||
# See the OWNERS docs at https://go.k8s.io/owners
|
||||
|
||||
reviewers:
|
||||
- bart0sh
|
||||
- klueska
|
||||
- pohly
|
||||
labels:
|
||||
- wg/device-management
|
43
test/integration/scheduler_perf/dra/dra_test.go
Normal file
43
test/integration/scheduler_perf/dra/dra_test.go
Normal file
@ -0,0 +1,43 @@
|
||||
/*
|
||||
Copyright 2024 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package dra
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
_ "k8s.io/component-base/logs/json/register"
|
||||
perf "k8s.io/kubernetes/test/integration/scheduler_perf"
|
||||
)
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
if err := perf.InitTests(); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "%v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
m.Run()
|
||||
}
|
||||
|
||||
func TestSchedulerPerf(t *testing.T) {
|
||||
perf.RunIntegrationPerfScheduling(t, "performance-config.yaml")
|
||||
}
|
||||
|
||||
func BenchmarkPerfScheduling(b *testing.B) {
|
||||
perf.RunBenchmarkPerfScheduling(b, "performance-config.yaml", "dra", nil)
|
||||
}
|
278
test/integration/scheduler_perf/dra/performance-config.yaml
Normal file
278
test/integration/scheduler_perf/dra/performance-config.yaml
Normal file
@ -0,0 +1,278 @@
|
||||
# The following labels are used in this file. (listed in ascending order of the number of covered test cases)
|
||||
#
|
||||
# - integration-test: test cases to run as the integration test, usually to spot some issues in the scheduler implementation or scheduler-perf itself.
|
||||
# - performance: test cases to run in the performance test.
|
||||
# - short: supplemental label for the above two labels (must not used alone), which literally means short execution time test cases.
|
||||
#
|
||||
# Specifically, the CIs use labels like the following:
|
||||
# - `ci-kubernetes-integration-master` (`integration-test`): Test cases are chosen based on a tradeoff between code coverage and overall runtime.
|
||||
# It basically covers all test cases but with their smallest workload.
|
||||
# - `pull-kubernetes-integration` (`integration-test`,`short`): Test cases are chosen so that they should take less than total 5 min to complete.
|
||||
# - `ci-benchmark-scheduler-perf` (`performance`): Long enough test cases are chosen (ideally, longer than 10 seconds)
|
||||
# to provide meaningful samples for the pod scheduling rate.
|
||||
#
|
||||
# Also, `performance`+`short` isn't used in the CIs, but it's used to test the performance test locally.
|
||||
# (Sometimes, the test cases with `integration-test` are too small to spot issues.)
|
||||
#
|
||||
# Combining `performance` and `short` selects suitable workloads for a local
|
||||
# before/after comparisons with benchstat.
|
||||
|
||||
# SchedulingWithResourceClaimTemplate uses a ResourceClaimTemplate
|
||||
# and dynamically creates ResourceClaim instances for each pod. Node, pod and
|
||||
# device counts are chosen so that the cluster gets filled up completely.
|
||||
- name: SchedulingWithResourceClaimTemplate
|
||||
featureGates:
|
||||
DynamicResourceAllocation: true
|
||||
# SchedulerQueueingHints: true
|
||||
workloadTemplate:
|
||||
- opcode: createNodes
|
||||
countParam: $nodesWithoutDRA
|
||||
- opcode: createNodes
|
||||
nodeTemplatePath: templates/node-with-dra-test-driver.yaml
|
||||
countParam: $nodesWithDRA
|
||||
- opcode: createResourceDriver
|
||||
driverName: test-driver.cdi.k8s.io
|
||||
nodes: scheduler-perf-dra-*
|
||||
maxClaimsPerNodeParam: $maxClaimsPerNode
|
||||
- opcode: createAny
|
||||
templatePath: templates/deviceclass.yaml
|
||||
- opcode: createAny
|
||||
templatePath: templates/resourceclaimtemplate.yaml
|
||||
namespace: init
|
||||
- opcode: createPods
|
||||
namespace: init
|
||||
countParam: $initPods
|
||||
podTemplatePath: templates/pod-with-claim-template.yaml
|
||||
- opcode: createAny
|
||||
templatePath: templates/resourceclaimtemplate.yaml
|
||||
namespace: test
|
||||
- opcode: createPods
|
||||
namespace: test
|
||||
countParam: $measurePods
|
||||
podTemplatePath: templates/pod-with-claim-template.yaml
|
||||
collectMetrics: true
|
||||
workloads:
|
||||
- name: fast
|
||||
labels: [integration-test, short]
|
||||
params:
|
||||
# This testcase runs through all code paths without
|
||||
# taking too long overall.
|
||||
nodesWithDRA: 1
|
||||
nodesWithoutDRA: 1
|
||||
initPods: 0
|
||||
measurePods: 10
|
||||
maxClaimsPerNode: 10
|
||||
- name: 2000pods_100nodes
|
||||
params:
|
||||
nodesWithDRA: 100
|
||||
nodesWithoutDRA: 0
|
||||
initPods: 1000
|
||||
measurePods: 1000
|
||||
maxClaimsPerNode: 20
|
||||
- name: 2000pods_200nodes
|
||||
params:
|
||||
nodesWithDRA: 200
|
||||
nodesWithoutDRA: 0
|
||||
initPods: 1000
|
||||
measurePods: 1000
|
||||
maxClaimsPerNode: 10
|
||||
- name: 5000pods_500nodes
|
||||
params:
|
||||
nodesWithDRA: 500
|
||||
nodesWithoutDRA: 0
|
||||
initPods: 2500
|
||||
measurePods: 2500
|
||||
maxClaimsPerNode: 10
|
||||
|
||||
# SteadyStateResourceClaimTemplate uses a ResourceClaimTemplate and
|
||||
# dynamically creates ResourceClaim instances for each pod. It creates ten
|
||||
# pods, waits for them to be scheduled, deletes them, and starts again,
|
||||
# so the cluster remains at the same level of utilization.
|
||||
#
|
||||
# The number of already allocated claims can be varied, thus simulating
|
||||
# various degrees of pre-existing resource utilization.
|
||||
- name: SteadyStateClusterResourceClaimTemplate
|
||||
featureGates:
|
||||
DynamicResourceAllocation: true
|
||||
# SchedulerQueueingHints: true
|
||||
workloadTemplate:
|
||||
- opcode: createNodes
|
||||
countParam: $nodesWithoutDRA
|
||||
- opcode: createNodes
|
||||
nodeTemplatePath: templates/node-with-dra-test-driver.yaml
|
||||
countParam: $nodesWithDRA
|
||||
- opcode: createResourceDriver
|
||||
driverName: test-driver.cdi.k8s.io
|
||||
nodes: scheduler-perf-dra-*
|
||||
maxClaimsPerNodeParam: $maxClaimsPerNode
|
||||
- opcode: createAny
|
||||
templatePath: templates/deviceclass.yaml
|
||||
- opcode: createAny
|
||||
templatePath: templates/resourceclaim.yaml
|
||||
countParam: $initClaims
|
||||
namespace: init
|
||||
- opcode: allocResourceClaims
|
||||
namespace: init
|
||||
- opcode: createAny
|
||||
templatePath: templates/resourceclaimtemplate.yaml
|
||||
namespace: test
|
||||
- opcode: createPods
|
||||
namespace: test
|
||||
count: 10
|
||||
steadyState: true
|
||||
durationParam: $duration
|
||||
podTemplatePath: templates/pod-with-claim-template.yaml
|
||||
collectMetrics: true
|
||||
workloads:
|
||||
- name: fast
|
||||
labels: [integration-test, short]
|
||||
params:
|
||||
# This testcase runs through all code paths without
|
||||
# taking too long overall.
|
||||
nodesWithDRA: 1
|
||||
nodesWithoutDRA: 1
|
||||
initClaims: 0
|
||||
maxClaimsPerNode: 10
|
||||
duration: 2s
|
||||
- name: empty_100nodes
|
||||
params:
|
||||
nodesWithDRA: 100
|
||||
nodesWithoutDRA: 0
|
||||
initClaims: 0
|
||||
maxClaimsPerNode: 10
|
||||
duration: 10s
|
||||
- name: empty_200nodes
|
||||
params:
|
||||
nodesWithDRA: 200
|
||||
nodesWithoutDRA: 0
|
||||
initClaims: 0
|
||||
maxClaimsPerNode: 10
|
||||
duration: 10s
|
||||
- name: empty_500nodes
|
||||
params:
|
||||
nodesWithDRA: 500
|
||||
nodesWithoutDRA: 0
|
||||
initClaims: 0
|
||||
maxClaimsPerNode: 10
|
||||
duration: 10s
|
||||
# In the "half" scenarios, half of the devices are in use.
|
||||
- name: half_100nodes
|
||||
params:
|
||||
nodesWithDRA: 100
|
||||
nodesWithoutDRA: 0
|
||||
initClaims: 500
|
||||
maxClaimsPerNode: 10
|
||||
duration: 10s
|
||||
- name: half_200nodes
|
||||
params:
|
||||
nodesWithDRA: 200
|
||||
nodesWithoutDRA: 0
|
||||
initClaims: 1000
|
||||
maxClaimsPerNode: 10
|
||||
duration: 10s
|
||||
- name: half_500nodes
|
||||
params:
|
||||
nodesWithDRA: 500
|
||||
nodesWithoutDRA: 0
|
||||
initClaims: 2500
|
||||
maxClaimsPerNode: 10
|
||||
duration: 10s
|
||||
# In the "full" scenarios, the cluster can accommodate exactly 10 additional pods.
|
||||
- name: full_100nodes
|
||||
params:
|
||||
nodesWithDRA: 100
|
||||
nodesWithoutDRA: 0
|
||||
initClaims: 990
|
||||
maxClaimsPerNode: 10
|
||||
duration: 10s
|
||||
- name: full_200nodes
|
||||
params:
|
||||
nodesWithDRA: 200
|
||||
nodesWithoutDRA: 0
|
||||
initClaims: 1990
|
||||
maxClaimsPerNode: 10
|
||||
duration: 10s
|
||||
- name: full_500nodes
|
||||
params:
|
||||
nodesWithDRA: 500
|
||||
nodesWithoutDRA: 0
|
||||
initClaims: 4990
|
||||
maxClaimsPerNode: 10
|
||||
duration: 10s
|
||||
|
||||
# SchedulingWithResourceClaimTemplate uses ResourceClaims
|
||||
# with deterministic names that are shared between pods.
|
||||
# There is a fixed ratio of 1:5 between claims and pods.
|
||||
- name: SchedulingWithResourceClaim
|
||||
featureGates:
|
||||
DynamicResourceAllocation: true
|
||||
# SchedulerQueueingHints: true
|
||||
workloadTemplate:
|
||||
- opcode: createNodes
|
||||
countParam: $nodesWithoutDRA
|
||||
- opcode: createNodes
|
||||
nodeTemplatePath: templates/node-with-dra-test-driver.yaml
|
||||
countParam: $nodesWithDRA
|
||||
- opcode: createResourceDriver
|
||||
driverName: test-driver.cdi.k8s.io
|
||||
nodes: scheduler-perf-dra-*
|
||||
maxClaimsPerNodeParam: $maxClaimsPerNode
|
||||
- opcode: createAny
|
||||
templatePath: templates/deviceclass.yaml
|
||||
- opcode: createAny
|
||||
templatePath: templates/resourceclaim.yaml
|
||||
namespace: init
|
||||
countParam: $initClaims
|
||||
- opcode: createPods
|
||||
namespace: init
|
||||
countParam: $initPods
|
||||
podTemplatePath: templates/pod-with-claim-ref.yaml
|
||||
- opcode: createAny
|
||||
templatePath: templates/resourceclaim.yaml
|
||||
namespace: test
|
||||
countParam: $measureClaims
|
||||
- opcode: createPods
|
||||
namespace: test
|
||||
countParam: $measurePods
|
||||
podTemplatePath: templates/pod-with-claim-ref.yaml
|
||||
collectMetrics: true
|
||||
workloads:
|
||||
- name: fast
|
||||
labels: [integration-test, short]
|
||||
params:
|
||||
# This testcase runs through all code paths without
|
||||
# taking too long overall.
|
||||
nodesWithDRA: 1
|
||||
nodesWithoutDRA: 1
|
||||
initPods: 0
|
||||
initClaims: 0
|
||||
measurePods: 10
|
||||
measureClaims: 2 # must be measurePods / 5
|
||||
maxClaimsPerNode: 2
|
||||
- name: 2000pods_100nodes
|
||||
params:
|
||||
nodesWithDRA: 100
|
||||
nodesWithoutDRA: 0
|
||||
initPods: 1000
|
||||
initClaims: 200 # must be initPods / 5
|
||||
measurePods: 1000
|
||||
measureClaims: 200 # must be initPods / 5
|
||||
maxClaimsPerNode: 4
|
||||
- name: 2000pods_200nodes
|
||||
params:
|
||||
nodesWithDRA: 200
|
||||
nodesWithoutDRA: 0
|
||||
initPods: 1000
|
||||
initClaims: 200 # must be initPods / 5
|
||||
measurePods: 1000
|
||||
measureClaims: 200 # must be measurePods / 5
|
||||
maxClaimsPerNode: 2
|
||||
- name: 5000pods_500nodes
|
||||
params:
|
||||
nodesWithDRA: 500
|
||||
nodesWithoutDRA: 0
|
||||
initPods: 2500
|
||||
initClaims: 500 # must be initPods / 5
|
||||
measurePods: 2500
|
||||
measureClaims: 500 # must be measurePods / 5
|
||||
maxClaimsPerNode: 2
|
@ -0,0 +1,43 @@
|
||||
/*
|
||||
Copyright 2024 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package eventhandling
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
_ "k8s.io/component-base/logs/json/register"
|
||||
perf "k8s.io/kubernetes/test/integration/scheduler_perf"
|
||||
)
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
if err := perf.InitTests(); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "%v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
m.Run()
|
||||
}
|
||||
|
||||
func TestSchedulerPerf(t *testing.T) {
|
||||
perf.RunIntegrationPerfScheduling(t, "performance-config.yaml")
|
||||
}
|
||||
|
||||
func BenchmarkPerfScheduling(b *testing.B) {
|
||||
perf.RunBenchmarkPerfScheduling(b, "performance-config.yaml", "eventhandling", nil)
|
||||
}
|
@ -0,0 +1,538 @@
|
||||
# The following labels are used in this file. (listed in ascending order of the number of covered test cases)
|
||||
#
|
||||
# - integration-test: test cases to run as the integration test, usually to spot some issues in the scheduler implementation or scheduler-perf itself.
|
||||
# - performance: test cases to run in the performance test.
|
||||
# - short: supplemental label for the above two labels (must not used alone), which literally means short execution time test cases.
|
||||
#
|
||||
# Specifically, the CIs use labels like the following:
|
||||
# - `ci-kubernetes-integration-master` (`integration-test`): Test cases are chosen based on a tradeoff between code coverage and overall runtime.
|
||||
# It basically covers all test cases but with their smallest workload.
|
||||
# - `pull-kubernetes-integration` (`integration-test`,`short`): Test cases are chosen so that they should take less than total 5 min to complete.
|
||||
# - `ci-benchmark-scheduler-perf` (`performance`): Long enough test cases are chosen (ideally, longer than 10 seconds)
|
||||
# to provide meaningful samples for the pod scheduling rate.
|
||||
#
|
||||
# Also, `performance`+`short` isn't used in the CIs, but it's used to test the performance test locally.
|
||||
# (Sometimes, the test cases with `integration-test` are too small to spot issues.)
|
||||
#
|
||||
# Combining `performance` and `short` selects suitable workloads for a local
|
||||
# before/after comparisons with benchstat.
|
||||
|
||||
# This test case is used to measure the performance of queuing hints when handling the AssignedPodDelete events.
|
||||
# First, two groups of blocker pods are created, which will prevents other pods from being scheduled.
|
||||
# Then multiple types of pods are created, and each group is filtered by different plugin.
|
||||
# Next, blocker pods are gradually deleted and previously unscheduled pods can be scheduled.
|
||||
# Plugins covered: InterPodAffinity, NodePorts, NodeResources, NodeVolumeLimits, PodTopologySpread and VolumeRestrictions.
|
||||
- name: EventHandlingPodDelete
|
||||
featureGates:
|
||||
SchedulerQueueingHints: true
|
||||
workloadTemplate:
|
||||
- opcode: createNodes
|
||||
countParam: $initNodes
|
||||
nodeTemplatePath: ../templates/node-default.yaml
|
||||
# Allow max 20 volumes per node.
|
||||
nodeAllocatableStrategy:
|
||||
nodeAllocatable:
|
||||
attachable-volumes-csi-ebs.csi.aws.com: "20"
|
||||
csiNodeAllocatable:
|
||||
ebs.csi.aws.com:
|
||||
count: 20
|
||||
# Create pods that will block other pods from being scheduled.
|
||||
# They'll block using NodePorts, NodeResources, NodeVolumeLimits and PodTopologySpread plugins.
|
||||
- opcode: createPods
|
||||
countParam: $blockerPods
|
||||
podTemplatePath: templates/poddelete-pod-blocker-topology-ports-resources.yaml
|
||||
persistentVolumeTemplatePath: ../templates/pv-csi.yaml
|
||||
persistentVolumeClaimTemplatePath: ../templates/pvc.yaml
|
||||
namespace: blockertopologyportsresources
|
||||
# Create second group of pods that will block another pods from being scheduled.
|
||||
# They'll block using InterPodAffinity and VolumeRestrictions plugins.
|
||||
- opcode: createPods
|
||||
countParam: $blockerPods
|
||||
podTemplatePath: templates/poddelete-pod-blocker-affinity.yaml
|
||||
persistentVolumeTemplatePath: ../templates/pv-csi.yaml
|
||||
persistentVolumeClaimTemplatePath: ../templates/pvc-once-pod.yaml
|
||||
namespace: blockeraffinity
|
||||
# Collect metrics from all createPods ops below.
|
||||
- opcode: startCollectingMetrics
|
||||
name: unschedPods
|
||||
namespaces: [blockertopologyportsresources, blockeraffinity, nodeports, noderesources, nodevolumelimits, interpodaffinity]
|
||||
labelSelector:
|
||||
type: unsched
|
||||
# Create pods blocked using PodTopologySpread plugin.
|
||||
# Note: for this plugin, namespace has to match the blocker's namespace,
|
||||
# so has to be "blockertopologyportsresources".
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
podTemplatePath: templates/poddelete-pod-podtopologyspread.yaml
|
||||
skipWaitToCompletion: true
|
||||
namespace: blockertopologyportsresources
|
||||
# Create pods blocked using VolumeRestrictions plugin.
|
||||
# Note: these pods uses PVCs and PVs created for second blocker pods,
|
||||
# so the count needs to be equal to $blockerPods
|
||||
# and namespace has to be "blockeraffinity".
|
||||
- opcode: createPods
|
||||
countParam: $blockerPods
|
||||
podTemplatePath: templates/poddelete-pod-volumerestrictions.yaml
|
||||
skipWaitToCompletion: true
|
||||
namespace: blockeraffinity
|
||||
# Create pods blocked using NodePorts plugin.
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
podTemplatePath: templates/poddelete-pod-nodeports.yaml
|
||||
skipWaitToCompletion: true
|
||||
namespace: nodeports
|
||||
# Create pods blocked using NodeResources plugin.
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
podTemplatePath: templates/poddelete-pod-noderesources.yaml
|
||||
skipWaitToCompletion: true
|
||||
namespace: noderesources
|
||||
# Create pods blocked using NodeVolumeLimits plugin.
|
||||
- opcode: createPods
|
||||
countParam: $blockerPods
|
||||
podTemplatePath: templates/poddelete-pod-nodevolumelimits.yaml
|
||||
persistentVolumeTemplatePath: ../templates/pv-csi.yaml
|
||||
persistentVolumeClaimTemplatePath: ../templates/pvc.yaml
|
||||
skipWaitToCompletion: true
|
||||
namespace: nodevolumelimits
|
||||
# Create pods blocked using InterPodAffinity plugin.
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
podTemplatePath: templates/poddelete-pod-interpodaffinity.yaml
|
||||
skipWaitToCompletion: true
|
||||
namespace: interpodaffinity
|
||||
# Wait for unschedulable pods to be processed by the scheduler.
|
||||
- opcode: barrier
|
||||
stageRequirement: Attempted
|
||||
labelSelector:
|
||||
type: unsched
|
||||
# Start deleting blocker pods.
|
||||
- opcode: deletePods
|
||||
deletePodsPerSecond: 100
|
||||
namespace: blockertopologyportsresources
|
||||
labelSelector:
|
||||
type: blocker
|
||||
skipWaitToCompletion: true
|
||||
- opcode: deletePods
|
||||
deletePodsPerSecond: 100
|
||||
namespace: blockeraffinity
|
||||
labelSelector:
|
||||
type: blocker
|
||||
skipWaitToCompletion: true
|
||||
# Wait for previously unschedulable pods to be scheduled.
|
||||
- opcode: barrier
|
||||
labelSelector:
|
||||
type: unsched
|
||||
- opcode: stopCollectingMetrics
|
||||
workloads:
|
||||
- name: 50Nodes_500Pods
|
||||
labels: [performance, short]
|
||||
params:
|
||||
initNodes: 50
|
||||
blockerPods: 480 # Must be slightly below initNodes * 10 to be stable
|
||||
measurePods: 500 # Must be initNodes * 10
|
||||
|
||||
# This test case is used to measure the performance of queuing hints when handling the pod update events:
|
||||
# UpdatePodLabel, UpdatePodScaleDown, UpdatePodTolerations and UpdatePodSchedulingGatesEliminated.
|
||||
# It has a few stages, but general idea is to make a node and block some pods on it
|
||||
# or to create additional blocker pods that will prevent the other ones from being scheduled.
|
||||
# Then, updating the blocker pods or the unschedulable pods themselves generate cluster events,
|
||||
# that through QHints make the pods schedulable.
|
||||
# Plugins covered: InterPodAffinity, NodeResources, NodeUnschedulable, PodTopologySpread, SchedulingGates and TaintToleration.
|
||||
- name: EventHandlingPodUpdate
|
||||
featureGates:
|
||||
SchedulerQueueingHints: true
|
||||
InPlacePodVerticalScaling: true
|
||||
workloadTemplate:
|
||||
# Collect metrics from all createPods ops that initially create unschedulable pods (type: unsched).
|
||||
- opcode: startCollectingMetrics
|
||||
namespaces: [nodeunschedulable, tainttoleration, blocker, interpodaffinity, noderesources, schedulinggates]
|
||||
labelSelector:
|
||||
type: unsched
|
||||
# Create one unschedulable node.
|
||||
- opcode: createNodes
|
||||
count: 1
|
||||
nodeTemplatePath: templates/podupdate-node-unschedulable.yaml
|
||||
# Created pods blocked using NodeUnschedulable plugin.
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
podTemplatePath: templates/podupdate-pod-nodeunschedulable.yaml
|
||||
skipWaitToCompletion: true
|
||||
namespace: nodeunschedulable
|
||||
# Create one node with NoSchedule taint.
|
||||
- opcode: createNodes
|
||||
count: 1
|
||||
nodeTemplatePath: templates/podupdate-node-with-taint.yaml
|
||||
# Created pods blocked using TaintToleration plugin.
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
podTemplatePath: templates/podupdate-pod-tainttoleration.yaml
|
||||
skipWaitToCompletion: true
|
||||
namespace: tainttoleration
|
||||
# Wait for unschedulable pods to be processed by the scheduler.
|
||||
- opcode: barrier
|
||||
stageRequirement: Attempted
|
||||
# Update pods blocked using NodeUnschedulable plugin to make them schedulable.
|
||||
- opcode: updateAny
|
||||
countParam: $measurePods
|
||||
templatePath: templates/podupdate-pod-nodeunschedulable-update.yaml
|
||||
updatePerSecond: 100
|
||||
namespace: nodeunschedulable
|
||||
# Update pods blocked using TaintToleration plugin to make them schedulable.
|
||||
- opcode: updateAny
|
||||
countParam: $measurePods
|
||||
templatePath: templates/podupdate-pod-tainttoleration-update.yaml
|
||||
updatePerSecond: 100
|
||||
namespace: tainttoleration
|
||||
# Wait for NodeUnschedulable and TaintToleration pods to be scheduled.
|
||||
- opcode: barrier
|
||||
# Create schedulable nodes.
|
||||
- opcode: createNodes
|
||||
count: 1
|
||||
nodeTemplatePath: ../templates/node-with-name.yaml
|
||||
# Create pods that will block other pods from being scheduled.
|
||||
# They'll block using InterPodAffinity, NodeResources and PodTopologySpread plugins.
|
||||
# All blocker pods are scheduled before proceeding.
|
||||
- opcode: createPods
|
||||
countParam: $blockerPods
|
||||
podTemplatePath: templates/podupdate-pod-blocker.yaml
|
||||
namespace: blocker
|
||||
# Created pods blocked using InterPodAffinity plugin.
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
podTemplatePath: templates/podupdate-pod-interpodaffinity.yaml
|
||||
skipWaitToCompletion: true
|
||||
namespace: interpodaffinity
|
||||
# Created pods blocked using NodeResources plugin.
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
podTemplatePath: templates/podupdate-pod-noderesources.yaml
|
||||
skipWaitToCompletion: true
|
||||
namespace: noderesources
|
||||
# Created pods blocked using PodTopologySpread plugin.
|
||||
# Count has to match $blockerPods as pod uses it as a maxSkew value,
|
||||
# that must be equal to number of blocker pods.
|
||||
- opcode: createPods
|
||||
countParam: $blockerPods
|
||||
podTemplatePath: templates/podupdate-pod-podtopologyspread.yaml
|
||||
skipWaitToCompletion: true
|
||||
namespace: blocker
|
||||
# Created pods blocked using SchedulingGates plugin.
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
podTemplatePath: templates/podupdate-pod-schedulinggates.yaml
|
||||
skipWaitToCompletion: true
|
||||
namespace: schedulinggates
|
||||
# Wait for unschedulable pods to be processed by the scheduler.
|
||||
- opcode: barrier
|
||||
stageRequirement: Attempted
|
||||
labelSelector:
|
||||
type: unsched
|
||||
# Update blocker pods' labels and scale down their resource requests
|
||||
# to make the unschedulable pods schedulable.
|
||||
- opcode: updateAny
|
||||
countParam: $blockerPods
|
||||
templatePath: templates/podupdate-pod-blocker-update.yaml
|
||||
updatePerSecond: 100
|
||||
namespace: blocker
|
||||
# Update pods blocked by SchedulingGates by removing the gate from themselves.
|
||||
- opcode: updateAny
|
||||
countParam: $measurePods
|
||||
templatePath: templates/podupdate-pod-schedulinggates-update.yaml
|
||||
updatePerSecond: 100
|
||||
namespace: schedulinggates
|
||||
# Wait for previously unschedulable pods to be scheduled.
|
||||
- opcode: barrier
|
||||
labelSelector:
|
||||
type: unsched
|
||||
- opcode: stopCollectingMetrics
|
||||
workloads:
|
||||
- name: 1Node_1000Pods
|
||||
labels: [performance, short]
|
||||
params:
|
||||
blockerPods: 1000
|
||||
measurePods: 1000
|
||||
|
||||
# This test case is used to measure the performance of queuing hints when handling the NodeAdd events.
|
||||
# First, an unschedulable node is created, which prevents any pod from being scheduled on it.
|
||||
# Then multiple types of pods are created, and each group is filtered by different plugin.
|
||||
# Next, nodes are created where previously unscheduled pods can be scheduled.
|
||||
# The test case is divided into several stages to make sure that the pods are filtered by a specific plugin.
|
||||
# Plugins covered: InterPodAffinity, NodeAffinity, NodeResources, NodeUnschedulable, PodTopologySpread and TaintToleration.
|
||||
- name: EventHandlingNodeAdd
|
||||
featureGates:
|
||||
SchedulerQueueingHints: true
|
||||
workloadTemplate:
|
||||
# Collect metrics from all createPods ops.
|
||||
- opcode: startCollectingMetrics
|
||||
name: unschedPods
|
||||
namespaces: [nodeunschedulable, noderesources, interpodaffinity, nodeaffinity, podtopologyspread, tainttoleration]
|
||||
# Create one unschedulable node.
|
||||
- opcode: createNodes
|
||||
count: 1
|
||||
nodeTemplatePath: templates/nodeadd-node-unschedulable.yaml
|
||||
# Created pods blocked using NodeUnschedulable plugin.
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
podTemplatePath: templates/nodeadd-pod-nodeunschedulable.yaml
|
||||
skipWaitToCompletion: true
|
||||
namespace: nodeunschedulable
|
||||
# Wait for unschedulable pods to be processed by the scheduler.
|
||||
- opcode: barrier
|
||||
stageRequirement: Attempted
|
||||
# Create schedulable node with low capacity.
|
||||
- opcode: createNodes
|
||||
count: 1
|
||||
nodeTemplatePath: templates/nodeadd-node-low-capacity.yaml
|
||||
# Created pods blocked using NodeResources plugin.
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
podTemplatePath: templates/nodeadd-pod-noderesources.yaml
|
||||
skipWaitToCompletion: true
|
||||
namespace: noderesources
|
||||
# Wait for unschedulable pods to be processed by the scheduler.
|
||||
- opcode: barrier
|
||||
stageRequirement: Attempted
|
||||
# Create nodes that will have enough resource capacity for pods blocked by NodeResources plugin.
|
||||
# These nodes will still block the next pods from being scheduled.
|
||||
- opcode: createNodes
|
||||
countParam: $nodes
|
||||
nodeTemplatePath: templates/nodeadd-node-high-capacity.yaml
|
||||
# Wait on barrier for NodeUnschedulable and NodeResources pods to be scheduled.
|
||||
- opcode: barrier
|
||||
# Created pods blocked using InterPodAffinity plugin.
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
podTemplatePath: templates/nodeadd-pod-interpodaffinity.yaml
|
||||
skipWaitToCompletion: true
|
||||
namespace: interpodaffinity
|
||||
# Created pods blocked using NodeAffinity plugin.
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
podTemplatePath: templates/nodeadd-pod-nodeaffinity.yaml
|
||||
skipWaitToCompletion: true
|
||||
namespace: nodeaffinity
|
||||
# Created pods blocked using PodTopologySpread plugin.
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
podTemplatePath: templates/nodeadd-pod-podtopologyspread.yaml
|
||||
skipWaitToCompletion: true
|
||||
namespace: podtopologyspread
|
||||
# Wait for unschedulable pods to be processed by the scheduler.
|
||||
- opcode: barrier
|
||||
stageRequirement: Attempted
|
||||
# Create nodes that will unblock most of the unschedulable pods.
|
||||
- opcode: createNodes
|
||||
countParam: $nodes
|
||||
nodeTemplatePath: templates/nodeadd-node-with-labels.yaml
|
||||
# Wait on barrier for InterPodAffinity, NodeAffinity and PodTopologySpread pods to be scheduled.
|
||||
- opcode: barrier
|
||||
# Created pods blocked using TaintToleration plugin.
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
podTemplatePath: templates/nodeadd-pod-tainttoleration.yaml
|
||||
skipWaitToCompletion: true
|
||||
namespace: tainttoleration
|
||||
# Wait for unschedulable pods to be processed by the scheduler.
|
||||
- opcode: barrier
|
||||
stageRequirement: Attempted
|
||||
# Create nodes that will unblock pods filtered out by TaintToleration plugin.
|
||||
- opcode: createNodes
|
||||
countParam: $nodes
|
||||
nodeTemplatePath: templates/nodeadd-node-with-taint.yaml
|
||||
# Wait on barrier for TaintToleration pods to be scheduled.
|
||||
- opcode: barrier
|
||||
- opcode: stopCollectingMetrics
|
||||
workloads:
|
||||
- name: 100Nodes_500Pods
|
||||
labels: [performance, short]
|
||||
params:
|
||||
nodes: 100
|
||||
measurePods: 1000 # Must be initNodes * 10
|
||||
|
||||
# This test case is used to measure the performance of queuing hints when handling the AssignedPodAdd events.
|
||||
# First, two nodes are created. Then, one pod is created and scheduled on one of the nodes.
|
||||
# Next, group of topology spreading pods tries to be scheduled, but they can only fill one node,
|
||||
# because of the anti affinity to the pod on the second node.
|
||||
# Then, group of interpodaffinity pods is created and wait for pods with matching labels to be scheduled first.
|
||||
# Next, new pods are scheduled that unblock the previously unschedulable pods, by balancing the topology
|
||||
# and scheduling pods with matching labels to the intepodaffinity pods.
|
||||
# Plugins covered: InterPodAffinity and PodTopologySpread.
|
||||
- name: EventHandlingPodAdd
|
||||
featureGates:
|
||||
SchedulerQueueingHints: true
|
||||
workloadTemplate:
|
||||
# Create two nodes with enough capacity.
|
||||
- opcode: createNodes
|
||||
count: 2
|
||||
nodeTemplatePath: templates/podadd-node.yaml
|
||||
# Create one pod with label that will block topology spreading pods
|
||||
# from being scheduled on one node using pod anti affinity.
|
||||
- opcode: createPods
|
||||
count: 1
|
||||
podTemplatePath: templates/podadd-pod-with-label.yaml
|
||||
namespace: podtopologyspread
|
||||
# Collect metrics for unsched pods created below.
|
||||
- opcode: startCollectingMetrics
|
||||
name: unschedPods
|
||||
namespaces: [podtopologyspread, interpodaffinity]
|
||||
labelSelector:
|
||||
type: unsched
|
||||
# Create pods blocked using PodTopologySpread plugin.
|
||||
# Max skew is configured to 1, so more pods need to be created on the first node
|
||||
# (with the pod created above), to fill up the second node with these pods.
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
podTemplatePath: templates/podadd-pod-podtopologyspread.yaml
|
||||
skipWaitToCompletion: true
|
||||
namespace: podtopologyspread
|
||||
# Create pods blocked using InterPodAffinity plugin.
|
||||
# They don't have the affinity to themselves,
|
||||
# so have to wait for another pods with matching labels to be created at first.
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
podTemplatePath: templates/podadd-pod-interpodaffinity.yaml
|
||||
skipWaitToCompletion: true
|
||||
namespace: interpodaffinity
|
||||
# Wait for unschedulable pods to be processed by the scheduler.
|
||||
- opcode: barrier
|
||||
stageRequirement: Attempted
|
||||
labelSelector:
|
||||
type: unsched
|
||||
# Create pods that will get scheduled on the node with the first pod created with matching label.
|
||||
# Their creation will gradually unblock topology spreading pods and make them schedulable on the second node.
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
podTemplatePath: templates/podadd-pod-unblocker-topology.yaml
|
||||
namespace: podtopologyspread
|
||||
# Create pods with matching labels to the affinity of previously created interpodaffinity pods.
|
||||
# Each of them will unblock one pod and make it schedulable.
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
podTemplatePath: templates/podadd-pod-unblocker-affinity.yaml
|
||||
namespace: interpodaffinity
|
||||
# Wait for previously unschedulable pods to be scheduled.
|
||||
- opcode: barrier
|
||||
labelSelector:
|
||||
type: unsched
|
||||
- opcode: stopCollectingMetrics
|
||||
workloads:
|
||||
- name: 1000Pods
|
||||
labels: [performance, short]
|
||||
params:
|
||||
measurePods: 1000
|
||||
|
||||
# This test case is used to measure the performance of queuing hints when handling the NodeUpdate events.
|
||||
# First, group of nodes is created and prevent any further pod from being scheduled on them.
|
||||
# Then, pods are created and are filtered by a specific plugin, thus unschedulable.
|
||||
# Next, nodes are updated and previously unscheduled pods can be scheduled on them.
|
||||
# The test case is divided into several stages to make sure that the pods are filtered by a specific plugin.
|
||||
# Plugins covered: InterPodAffinity, NodeAffinity, NodeResources, NodeUnschedulable, PodTopologySpread and TaintToleration.
|
||||
- name: EventHandlingNodeUpdate
|
||||
featureGates:
|
||||
SchedulerQueueingHints: true
|
||||
workloadTemplate:
|
||||
# Collect metrics from all createPods ops.
|
||||
- opcode: startCollectingMetrics
|
||||
name: unschedPods
|
||||
namespaces: [nodeunschedulable, noderesources, interpodaffinity, nodeaffinity, podtopologyspread, tainttoleration]
|
||||
# Create unschedulable nodes.
|
||||
- opcode: createNodes
|
||||
countParam: $nodes
|
||||
nodeTemplatePath: templates/nodeupdate-node-unschedulable.yaml
|
||||
# Create pods blocked using NodeUnschedulable plugin.
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
podTemplatePath: templates/nodeupdate-pod-nodeunschedulable.yaml
|
||||
skipWaitToCompletion: true
|
||||
namespace: nodeunschedulable
|
||||
# Wait for unschedulable pods to be processed by the scheduler.
|
||||
- opcode: barrier
|
||||
stageRequirement: Attempted
|
||||
# Update nodes not to be unschedulable anymore.
|
||||
- opcode: updateAny
|
||||
countParam: $nodes
|
||||
templatePath: templates/nodeupdate-node-unschedulable-update.yaml
|
||||
updatePerSecond: 100
|
||||
# Wait on barrier for NodeUnschedulable pods to be scheduled.
|
||||
- opcode: barrier
|
||||
# Create node with low capacity.
|
||||
- opcode: createNodes
|
||||
countParam: $nodes
|
||||
nodeTemplatePath: templates/nodeupdate-node-low-capacity.yaml
|
||||
# Create pods blocked using NodeResources plugin.
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
podTemplatePath: templates/nodeupdate-pod-noderesources.yaml
|
||||
skipWaitToCompletion: true
|
||||
namespace: noderesources
|
||||
# Wait for unschedulable pods to be processed by the scheduler.
|
||||
- opcode: barrier
|
||||
stageRequirement: Attempted
|
||||
# Update nodes to have enough resource capacity for pods blocked by NodeResources plugin.
|
||||
# These nodes will still block the next pods from being scheduled.
|
||||
- opcode: updateAny
|
||||
countParam: $nodes
|
||||
templatePath: templates/nodeupdate-node-low-capacity-update.yaml
|
||||
updatePerSecond: 100
|
||||
# Wait on barrier for NodeResources pods to be scheduled.
|
||||
- opcode: barrier
|
||||
# Create nodes without any labels.
|
||||
- opcode: createNodes
|
||||
countParam: $nodes
|
||||
nodeTemplatePath: templates/nodeupdate-node-without-labels.yaml
|
||||
# Create pods blocked using InterPodAffinity plugin.
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
podTemplatePath: templates/nodeupdate-pod-interpodaffinity.yaml
|
||||
skipWaitToCompletion: true
|
||||
namespace: interpodaffinity
|
||||
# Create pods blocked using NodeAffinity plugin.
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
podTemplatePath: templates/nodeupdate-pod-nodeaffinity.yaml
|
||||
skipWaitToCompletion: true
|
||||
namespace: nodeaffinity
|
||||
# Create pods blocked using PodTopologySpread plugin.
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
podTemplatePath: templates/nodeupdate-pod-podtopologyspread.yaml
|
||||
skipWaitToCompletion: true
|
||||
namespace: podtopologyspread
|
||||
# Wait for unschedulable pods to be processed by the scheduler.
|
||||
- opcode: barrier
|
||||
stageRequirement: Attempted
|
||||
# Update nodes to have labels required by the pods above.
|
||||
- opcode: updateAny
|
||||
countParam: $nodes
|
||||
templatePath: templates/nodeupdate-node-without-labels-update.yaml
|
||||
updatePerSecond: 100
|
||||
# Wait on barrier for InterPodAffinity, NodeAffinity and PodTopologySpread pods to be scheduled.
|
||||
- opcode: barrier
|
||||
# Create nodes with taints not matching the tolerations of pods below.
|
||||
- opcode: createNodes
|
||||
countParam: $nodes
|
||||
nodeTemplatePath: templates/nodeupdate-node-without-taints.yaml
|
||||
# Create pods blocked using TaintToleration plugin.
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
podTemplatePath: templates/nodeupdate-pod-tainttoleration.yaml
|
||||
skipWaitToCompletion: true
|
||||
namespace: tainttoleration
|
||||
# Wait for unschedulable pods to be processed by the scheduler.
|
||||
- opcode: barrier
|
||||
stageRequirement: Attempted
|
||||
# Update nodes to have matching taints to the pods above.
|
||||
- opcode: updateAny
|
||||
countParam: $nodes
|
||||
templatePath: templates/nodeupdate-node-without-taints-update.yaml
|
||||
updatePerSecond: 100
|
||||
# Wait on barrier for TaintToleration pods to be scheduled.
|
||||
- opcode: barrier
|
||||
- opcode: stopCollectingMetrics
|
||||
workloads:
|
||||
- name: 100Nodes_1000Pods
|
||||
labels: [performance, short]
|
||||
params:
|
||||
nodes: 100
|
||||
measurePods: 1000 # Must be initNodes * 10
|
@ -1,67 +0,0 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package benchmark
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"k8s.io/component-base/logs"
|
||||
logsapi "k8s.io/component-base/logs/api/v1"
|
||||
_ "k8s.io/component-base/logs/json/register"
|
||||
"k8s.io/kubernetes/test/utils/ktesting"
|
||||
)
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
// Run with -v=2, this is the default log level in production.
|
||||
ktesting.SetDefaultVerbosity(DefaultLoggingVerbosity)
|
||||
|
||||
// test/integration/framework/flags.go unconditionally initializes the
|
||||
// logging flags. That's correct for most tests, but in the
|
||||
// scheduler_perf test we want more control over the flags, therefore
|
||||
// here strip them out.
|
||||
var fs flag.FlagSet
|
||||
flag.CommandLine.VisitAll(func(f *flag.Flag) {
|
||||
switch f.Name {
|
||||
case "log-flush-frequency", "v", "vmodule":
|
||||
// These will be added below ourselves, don't copy.
|
||||
default:
|
||||
fs.Var(f.Value, f.Name, f.Usage)
|
||||
}
|
||||
})
|
||||
flag.CommandLine = &fs
|
||||
|
||||
flag.Var(LoggingFeatureGate, "feature-gate",
|
||||
"A set of key=value pairs that describe feature gates for alpha/experimental features. "+
|
||||
"Options are:\n"+strings.Join(LoggingFeatureGate.KnownFeatures(), "\n"))
|
||||
|
||||
// This would fail if we hadn't removed the logging flags above.
|
||||
logsapi.AddGoFlags(LoggingConfig, flag.CommandLine)
|
||||
|
||||
flag.Parse()
|
||||
|
||||
logs.InitLogs()
|
||||
if err := logsapi.ValidateAndApply(LoggingConfig, LoggingFeatureGate); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "%v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
m.Run()
|
||||
}
|
43
test/integration/scheduler_perf/misc/misc_test.go
Normal file
43
test/integration/scheduler_perf/misc/misc_test.go
Normal file
@ -0,0 +1,43 @@
|
||||
/*
|
||||
Copyright 2024 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package misc
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
_ "k8s.io/component-base/logs/json/register"
|
||||
perf "k8s.io/kubernetes/test/integration/scheduler_perf"
|
||||
)
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
if err := perf.InitTests(); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "%v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
m.Run()
|
||||
}
|
||||
|
||||
func TestSchedulerPerf(t *testing.T) {
|
||||
perf.RunIntegrationPerfScheduling(t, "performance-config.yaml")
|
||||
}
|
||||
|
||||
func BenchmarkPerfScheduling(b *testing.B) {
|
||||
perf.RunBenchmarkPerfScheduling(b, "performance-config.yaml", "misc", nil)
|
||||
}
|
442
test/integration/scheduler_perf/misc/performance-config.yaml
Normal file
442
test/integration/scheduler_perf/misc/performance-config.yaml
Normal file
@ -0,0 +1,442 @@
|
||||
# The following labels are used in this file. (listed in ascending order of the number of covered test cases)
|
||||
#
|
||||
# - integration-test: test cases to run as the integration test, usually to spot some issues in the scheduler implementation or scheduler-perf itself.
|
||||
# - performance: test cases to run in the performance test.
|
||||
# - short: supplemental label for the above two labels (must not used alone), which literally means short execution time test cases.
|
||||
#
|
||||
# Specifically, the CIs use labels like the following:
|
||||
# - `ci-kubernetes-integration-master` (`integration-test`): Test cases are chosen based on a tradeoff between code coverage and overall runtime.
|
||||
# It basically covers all test cases but with their smallest workload.
|
||||
# - `pull-kubernetes-integration` (`integration-test`,`short`): Test cases are chosen so that they should take less than total 5 min to complete.
|
||||
# - `ci-benchmark-scheduler-perf` (`performance`): Long enough test cases are chosen (ideally, longer than 10 seconds)
|
||||
# to provide meaningful samples for the pod scheduling rate.
|
||||
#
|
||||
# Also, `performance`+`short` isn't used in the CIs, but it's used to test the performance test locally.
|
||||
# (Sometimes, the test cases with `integration-test` are too small to spot issues.)
|
||||
#
|
||||
# Combining `performance` and `short` selects suitable workloads for a local
|
||||
# before/after comparisons with benchstat.
|
||||
|
||||
- name: SchedulingBasic
|
||||
defaultPodTemplatePath: ../templates/pod-default.yaml
|
||||
workloadTemplate:
|
||||
- opcode: createNodes
|
||||
countParam: $initNodes
|
||||
- opcode: createPods
|
||||
countParam: $initPods
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
collectMetrics: true
|
||||
workloads:
|
||||
- name: 5Nodes
|
||||
labels: [integration-test, short]
|
||||
params:
|
||||
initNodes: 5
|
||||
initPods: 5
|
||||
measurePods: 10
|
||||
- name: 500Nodes
|
||||
labels: [performance, short]
|
||||
params:
|
||||
initNodes: 500
|
||||
initPods: 500
|
||||
measurePods: 1000
|
||||
- name: 5000Nodes
|
||||
labels: [performance, short]
|
||||
params:
|
||||
initNodes: 5000
|
||||
initPods: 1000
|
||||
measurePods: 1000
|
||||
- name: 5000Nodes_10000Pods
|
||||
labels: [performance]
|
||||
threshold: 270
|
||||
params:
|
||||
initNodes: 5000
|
||||
initPods: 1000
|
||||
measurePods: 10000
|
||||
|
||||
# This test case simulates the scheduling of daemonset.
|
||||
# https://github.com/kubernetes/kubernetes/issues/124709
|
||||
- name: SchedulingDaemonset
|
||||
defaultPodTemplatePath: ../templates/daemonset-pod.yaml
|
||||
workloadTemplate:
|
||||
# Create one node with a specific name (scheduler-perf-node),
|
||||
# which is supposed to get all Pods created in this test case.
|
||||
- opcode: createNodes
|
||||
count: 1
|
||||
nodeTemplatePath: ../templates/node-with-name.yaml
|
||||
# Create other nodes that the scheduler has to filter out with PreFilterResult from NodeAffinity plugin.
|
||||
- opcode: createNodes
|
||||
countParam: $initNodes
|
||||
nodeTemplatePath: ../templates/node-default.yaml
|
||||
# Create pods with nodeAffinity (metadata.name=scheduler-perf-node).
|
||||
# This scenario doesn't schedule each Pod to each Node though,
|
||||
# they go through completely the same scheduling process as daemonset pods does.
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
collectMetrics: true
|
||||
workloads:
|
||||
- name: 5Nodes
|
||||
labels: [integration-test, short]
|
||||
params:
|
||||
initNodes: 5
|
||||
measurePods: 10
|
||||
- name: 15000Nodes
|
||||
labels: [performance, short]
|
||||
threshold: 390
|
||||
params:
|
||||
initNodes: 15000
|
||||
measurePods: 30000
|
||||
|
||||
- name: TopologySpreading
|
||||
workloadTemplate:
|
||||
- opcode: createNodes
|
||||
countParam: $initNodes
|
||||
nodeTemplatePath: ../templates/node-default.yaml
|
||||
labelNodePrepareStrategy:
|
||||
labelKey: "topology.kubernetes.io/zone"
|
||||
labelValues: ["moon-1", "moon-2", "moon-3"]
|
||||
- opcode: createPods
|
||||
countParam: $initPods
|
||||
podTemplatePath: ../templates/pod-default.yaml
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
podTemplatePath: ../templates/pod-with-topology-spreading.yaml
|
||||
collectMetrics: true
|
||||
workloads:
|
||||
- name: 5Nodes
|
||||
labels: [integration-test, short]
|
||||
params:
|
||||
initNodes: 5
|
||||
initPods: 10
|
||||
measurePods: 10
|
||||
- name: 500Nodes
|
||||
labels: [performance, short]
|
||||
params:
|
||||
initNodes: 500
|
||||
initPods: 1000
|
||||
measurePods: 1000
|
||||
- name: 5000Nodes
|
||||
labels: [performance, short]
|
||||
params:
|
||||
initNodes: 5000
|
||||
initPods: 5000
|
||||
measurePods: 2000
|
||||
- name: 5000Nodes_5000Pods
|
||||
labels: [performance]
|
||||
threshold: 85
|
||||
params:
|
||||
initNodes: 5000
|
||||
initPods: 5000
|
||||
measurePods: 5000
|
||||
|
||||
- name: PreferredTopologySpreading
|
||||
workloadTemplate:
|
||||
- opcode: createNodes
|
||||
countParam: $initNodes
|
||||
nodeTemplatePath: ../templates/node-default.yaml
|
||||
labelNodePrepareStrategy:
|
||||
labelKey: "topology.kubernetes.io/zone"
|
||||
labelValues: ["moon-1", "moon-2", "moon-3"]
|
||||
- opcode: createPods
|
||||
countParam: $initPods
|
||||
podTemplatePath: ../templates/pod-default.yaml
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
podTemplatePath: ../templates/pod-with-preferred-topology-spreading.yaml
|
||||
collectMetrics: true
|
||||
workloads:
|
||||
- name: 5Nodes
|
||||
labels: [integration-test, short]
|
||||
params:
|
||||
initNodes: 5
|
||||
initPods: 10
|
||||
measurePods: 10
|
||||
- name: 500Nodes
|
||||
labels: [performance, short]
|
||||
params:
|
||||
initNodes: 500
|
||||
initPods: 1000
|
||||
measurePods: 1000
|
||||
- name: 5000Nodes
|
||||
labels: [performance]
|
||||
params:
|
||||
initNodes: 5000
|
||||
initPods: 5000
|
||||
measurePods: 2000
|
||||
- name: 5000Nodes_5000Pods
|
||||
labels: [performance]
|
||||
threshold: 125
|
||||
params:
|
||||
initNodes: 5000
|
||||
initPods: 5000
|
||||
measurePods: 5000
|
||||
|
||||
- name: PreemptionBasic
|
||||
workloadTemplate:
|
||||
- opcode: createNodes
|
||||
countParam: $initNodes
|
||||
- opcode: createPods
|
||||
countParam: $initPods
|
||||
podTemplatePath: ../templates/pod-low-priority.yaml
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
podTemplatePath: ../templates/pod-high-priority.yaml
|
||||
collectMetrics: true
|
||||
workloads:
|
||||
- name: 5Nodes
|
||||
labels: [integration-test, short]
|
||||
params:
|
||||
initNodes: 5
|
||||
initPods: 20
|
||||
measurePods: 5
|
||||
- name: 500Nodes
|
||||
labels: [performance, short]
|
||||
threshold: 18
|
||||
params:
|
||||
initNodes: 500
|
||||
initPods: 2000
|
||||
measurePods: 500
|
||||
# This test case always seems to fail.
|
||||
# https://github.com/kubernetes/kubernetes/issues/108308
|
||||
#
|
||||
# - name: 5000Nodes
|
||||
# params:
|
||||
# initNodes: 5000
|
||||
# initPods: 20000
|
||||
# measurePods: 5000
|
||||
|
||||
# Measure throughput of regular schedulable pods that are interleaved by high priority preemption pods scheduled at 5/s rate.
|
||||
# Implementation of asynchronous preemption feature https://github.com/kubernetes/kubernetes/issues/126858 is supposed to increase the throughput of the measured pods as all heavy operations (apiserver communication) will be performed asynchronously, without blocking the scheduler loop.
|
||||
# How is it achieved:
|
||||
# 1. There are X initial nodes with 4 low priority pods each, consuming 3.6 CPU out of the total 4 available.
|
||||
# 2. High priority preemption which need to preempt 3 of 4 low priority pods to fit (require 3 CPU).
|
||||
# 3. Measured pods are always schedulable, as they require 0.1 CPU only.
|
||||
- name: PreemptionAsync
|
||||
workloadTemplate:
|
||||
- opcode: createNodes
|
||||
countParam: $initNodes
|
||||
- opcode: createPods
|
||||
countParam: $initPods
|
||||
podTemplatePath: ../templates/pod-low-priority.yaml
|
||||
- opcode: churn
|
||||
mode: create
|
||||
templatePaths:
|
||||
- ../templates/pod-high-priority.yaml
|
||||
intervalMilliseconds: 200
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
podTemplatePath: ../templates/pod-default.yaml
|
||||
collectMetrics: true
|
||||
workloads:
|
||||
- name: 5Nodes
|
||||
labels: [integration-test, short]
|
||||
params:
|
||||
initNodes: 5
|
||||
initPods: 20
|
||||
measurePods: 5
|
||||
- name: 500Nodes
|
||||
labels: [performance, short]
|
||||
params:
|
||||
initNodes: 500
|
||||
initPods: 2000
|
||||
measurePods: 500
|
||||
- name: 5000Nodes
|
||||
labels: [performance]
|
||||
threshold: 200
|
||||
params:
|
||||
initNodes: 5000
|
||||
initPods: 20000
|
||||
measurePods: 5000
|
||||
|
||||
# Measure throughput of regular schedulable pods that are interleaved by unschedulable pods injected at 5/s rate.
|
||||
- name: Unschedulable
|
||||
workloadTemplate:
|
||||
- opcode: createNodes
|
||||
countParam: $initNodes
|
||||
- opcode: churn
|
||||
mode: create
|
||||
templatePaths:
|
||||
- ../templates/pod-high-priority-large-cpu.yaml
|
||||
intervalMilliseconds: 200
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
podTemplatePath: ../templates/pod-default.yaml
|
||||
collectMetrics: true
|
||||
workloads:
|
||||
- name: 5Nodes/10Pods
|
||||
labels: [integration-test, short]
|
||||
params:
|
||||
initNodes: 5
|
||||
measurePods: 10
|
||||
- name: 500Nodes/1kPods
|
||||
labels: [performance, short]
|
||||
params:
|
||||
initNodes: 500
|
||||
measurePods: 1000
|
||||
- name: 5kNodes/1kPods
|
||||
labels: [performance, short]
|
||||
params:
|
||||
initNodes: 5000
|
||||
measurePods: 1000
|
||||
- name: 5kNodes/10kPods
|
||||
labels: [performance]
|
||||
threshold: 200
|
||||
params:
|
||||
initNodes: 5000
|
||||
measurePods: 10000
|
||||
|
||||
- name: SchedulingWithMixedChurn
|
||||
workloadTemplate:
|
||||
- opcode: createNodes
|
||||
countParam: $initNodes
|
||||
- opcode: churn
|
||||
mode: recreate
|
||||
number: 1
|
||||
templatePaths:
|
||||
- ../templates/churn/node-default.yaml
|
||||
- ../templates/pod-high-priority-large-cpu.yaml
|
||||
- ../templates/churn/service-default.yaml
|
||||
intervalMilliseconds: 1000
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
podTemplatePath: ../templates/pod-default.yaml
|
||||
collectMetrics: true
|
||||
workloads:
|
||||
- name: 10Nodes
|
||||
labels: [integration-test, short]
|
||||
params:
|
||||
initNodes: 10
|
||||
measurePods: 100
|
||||
- name: 1000Nodes
|
||||
labels: [performance, short]
|
||||
params:
|
||||
initNodes: 1000
|
||||
measurePods: 1000
|
||||
- name: 5000Nodes
|
||||
labels: [performance, short]
|
||||
params:
|
||||
initNodes: 5000
|
||||
measurePods: 2000
|
||||
- name: 5000Nodes_10000Pods
|
||||
labels: [performance]
|
||||
threshold: 265
|
||||
params:
|
||||
initNodes: 5000
|
||||
measurePods: 10000
|
||||
|
||||
- name: SchedulingWithNodeInclusionPolicy
|
||||
featureGates:
|
||||
NodeInclusionPolicyInPodTopologySpread: true
|
||||
defaultPodTemplatePath: ../templates/pod-with-node-inclusion-policy.yaml
|
||||
workloadTemplate:
|
||||
- opcode: createNodes
|
||||
countParam: $normalNodes
|
||||
- opcode: createNodes
|
||||
nodeTemplatePath: ../templates/node-with-taint.yaml
|
||||
countParam: $taintNodes
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
collectMetrics: true
|
||||
workloads:
|
||||
- name: 5Nodes
|
||||
labels: [integration-test, short]
|
||||
params:
|
||||
taintNodes: 1
|
||||
normalNodes: 4
|
||||
measurePods: 4
|
||||
- name: 500Nodes
|
||||
labels: [performance, short]
|
||||
params:
|
||||
taintNodes: 100
|
||||
normalNodes: 400
|
||||
measurePods: 400
|
||||
- name: 5000Nodes
|
||||
labels: [performance, short]
|
||||
threshold: 68
|
||||
params:
|
||||
taintNodes: 1000
|
||||
normalNodes: 4000
|
||||
measurePods: 4000
|
||||
|
||||
# This test case simulates the scheduling when many pods are gated and others are gradually deleted.
|
||||
# https://github.com/kubernetes/kubernetes/issues/124384
|
||||
- name: SchedulingWhileGated
|
||||
defaultPodTemplatePath: ../templates/light-pod.yaml
|
||||
workloadTemplate:
|
||||
- opcode: createNodes
|
||||
count: 1
|
||||
nodeTemplatePath: ../templates/node-with-name.yaml
|
||||
# Create pods that will stay gated to the end of the test.
|
||||
- opcode: createPods
|
||||
countParam: $gatedPods
|
||||
podTemplatePath: ../templates/gated-pod.yaml
|
||||
skipWaitToCompletion: true
|
||||
# Wait to make sure gated pods are enqueued in scheduler.
|
||||
- opcode: barrier
|
||||
stageRequirement: Attempted
|
||||
# Create pods that will be gradually deleted after being scheduled.
|
||||
- opcode: createPods
|
||||
countParam: $deletingPods
|
||||
# Delete scheduled pods, which will generate many AssignedPodDelete events.
|
||||
# Each of them will be processed by the scheduling queue.
|
||||
# But, the scheduling throughput should only be minimally impacted by the number of gated Pods.
|
||||
- opcode: deletePods
|
||||
namespace: namespace-3
|
||||
deletePodsPerSecond: 50
|
||||
skipWaitToCompletion: true
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
collectMetrics: true
|
||||
workloads:
|
||||
- name: 1Node_10GatedPods
|
||||
labels: [integration-test, short]
|
||||
params:
|
||||
gatedPods: 10
|
||||
deletingPods: 10
|
||||
measurePods: 10
|
||||
- name: 1Node_10000GatedPods
|
||||
labels: [performance, short]
|
||||
threshold: 130
|
||||
params:
|
||||
gatedPods: 10000
|
||||
deletingPods: 20000
|
||||
measurePods: 20000
|
||||
|
||||
# This test case simulates the scheduling when pods selected to schedule have deletionTimestamp set.
|
||||
# There was a memory leak related to this path of code fixed in:
|
||||
# https://github.com/kubernetes/kubernetes/pull/126962
|
||||
# Main goal of this test case is to verify if InFlightEvents is empty after the test.
|
||||
- name: SchedulingDeletedPodsWithFinalizers
|
||||
featureGates:
|
||||
SchedulerQueueingHints: true
|
||||
defaultPodTemplatePath: ../templates/light-pod.yaml
|
||||
workloadTemplate:
|
||||
- opcode: createNodes
|
||||
countParam: $initNodes
|
||||
- opcode: createPods
|
||||
# Create pods with finalizers and delete them before they are scheduled.
|
||||
# DeletionTimestamp field should be populated then,
|
||||
# but a few pods should still be chosen into the scheduling.
|
||||
countParam: $deletingPods
|
||||
podTemplatePath: ../templates/pod-with-finalizer.yaml
|
||||
skipWaitToCompletion: true
|
||||
- opcode: deletePods
|
||||
namespace: namespace-1
|
||||
deletePodsPerSecond: 100
|
||||
skipWaitToCompletion: true
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
collectMetrics: true
|
||||
workloads:
|
||||
- name: 10Node_100DeletingPods
|
||||
labels: [integration-test, short]
|
||||
params:
|
||||
initNodes: 10
|
||||
deletingPods: 10
|
||||
measurePods: 10
|
||||
- name: 1000Node_1000DeletingPods
|
||||
labels: [performance, short]
|
||||
params:
|
||||
initNodes: 1000
|
||||
deletingPods: 1000
|
||||
measurePods: 1000
|
@ -53,6 +53,7 @@ import (
|
||||
"k8s.io/client-go/tools/cache"
|
||||
"k8s.io/component-base/featuregate"
|
||||
featuregatetesting "k8s.io/component-base/featuregate/testing"
|
||||
"k8s.io/component-base/logs"
|
||||
logsapi "k8s.io/component-base/logs/api/v1"
|
||||
"k8s.io/component-base/metrics/legacyregistry"
|
||||
"k8s.io/component-base/metrics/testutil"
|
||||
@ -104,7 +105,6 @@ const (
|
||||
)
|
||||
|
||||
const (
|
||||
configFile = "config/performance-config.yaml"
|
||||
extensionPointsLabelName = "extension_point"
|
||||
resultLabelName = "result"
|
||||
pluginLabelName = "plugin"
|
||||
@ -204,6 +204,47 @@ var (
|
||||
}
|
||||
)
|
||||
|
||||
var UseTestingLog *bool
|
||||
var PerfSchedulingLabelFilter *string
|
||||
var TestSchedulingLabelFilter *string
|
||||
|
||||
// InitTests should be called in a TestMain in each config subdirectory.
|
||||
func InitTests() error {
|
||||
// Run with -v=2, this is the default log level in production.
|
||||
ktesting.SetDefaultVerbosity(DefaultLoggingVerbosity)
|
||||
|
||||
// test/integration/framework/flags.go unconditionally initializes the
|
||||
// logging flags. That's correct for most tests, but in the
|
||||
// scheduler_perf test we want more control over the flags, therefore
|
||||
// here strip them out.
|
||||
var fs flag.FlagSet
|
||||
flag.CommandLine.VisitAll(func(f *flag.Flag) {
|
||||
switch f.Name {
|
||||
case "log-flush-frequency", "v", "vmodule":
|
||||
// These will be added below ourselves, don't copy.
|
||||
default:
|
||||
fs.Var(f.Value, f.Name, f.Usage)
|
||||
}
|
||||
})
|
||||
flag.CommandLine = &fs
|
||||
|
||||
flag.Var(LoggingFeatureGate, "feature-gate",
|
||||
"A set of key=value pairs that describe feature gates for alpha/experimental features. "+
|
||||
"Options are:\n"+strings.Join(LoggingFeatureGate.KnownFeatures(), "\n"))
|
||||
|
||||
UseTestingLog = flag.Bool("use-testing-log", false, "Write log entries with testing.TB.Log. This is more suitable for unit testing and debugging, but less realistic in real benchmarks.")
|
||||
PerfSchedulingLabelFilter = flag.String("perf-scheduling-label-filter", "performance", "comma-separated list of labels which a testcase must have (no prefix or +) or must not have (-), used by BenchmarkPerfScheduling")
|
||||
TestSchedulingLabelFilter = flag.String("test-scheduling-label-filter", "integration-test,-performance", "comma-separated list of labels which a testcase must have (no prefix or +) or must not have (-), used by TestScheduling")
|
||||
|
||||
// This would fail if we hadn't removed the logging flags above.
|
||||
logsapi.AddGoFlags(LoggingConfig, flag.CommandLine)
|
||||
|
||||
flag.Parse()
|
||||
|
||||
logs.InitLogs()
|
||||
return logsapi.ValidateAndApply(LoggingConfig, LoggingFeatureGate)
|
||||
}
|
||||
|
||||
// testCase defines a set of test cases that intends to test the performance of
|
||||
// similar workloads of varying sizes with shared overall settings such as
|
||||
// feature gates and metrics collected.
|
||||
@ -930,11 +971,9 @@ func (scm stopCollectingMetricsOp) patchParams(_ *workload) (realOp, error) {
|
||||
return &scm, nil
|
||||
}
|
||||
|
||||
var useTestingLog = flag.Bool("use-testing-log", false, "Write log entries with testing.TB.Log. This is more suitable for unit testing and debugging, but less realistic in real benchmarks.")
|
||||
|
||||
func initTestOutput(tb testing.TB) io.Writer {
|
||||
var output io.Writer
|
||||
if *useTestingLog {
|
||||
if *UseTestingLog {
|
||||
output = framework.NewTBWriter(tb)
|
||||
} else {
|
||||
tmpDir := tb.TempDir()
|
||||
@ -966,9 +1005,9 @@ func initTestOutput(tb testing.TB) io.Writer {
|
||||
var specialFilenameChars = regexp.MustCompile(`[^a-zA-Z0-9-_]`)
|
||||
|
||||
func setupTestCase(t testing.TB, tc *testCase, featureGates map[featuregate.Feature]bool, output io.Writer, outOfTreePluginRegistry frameworkruntime.Registry) (informers.SharedInformerFactory, ktesting.TContext) {
|
||||
tCtx := ktesting.Init(t, initoption.PerTestOutput(*useTestingLog))
|
||||
tCtx := ktesting.Init(t, initoption.PerTestOutput(*UseTestingLog))
|
||||
artifacts, doArtifacts := os.LookupEnv("ARTIFACTS")
|
||||
if !*useTestingLog && doArtifacts {
|
||||
if !*UseTestingLog && doArtifacts {
|
||||
// Reconfigure logging so that it goes to a separate file per
|
||||
// test instead of stderr. If the test passes, the file gets
|
||||
// deleted. The overall output can be very large (> 200 MB for
|
||||
@ -1055,13 +1094,12 @@ func featureGatesMerge(src map[featuregate.Feature]bool, overrides map[featurega
|
||||
return result
|
||||
}
|
||||
|
||||
// RunBenchmarkPerfScheduling runs the scheduler performance tests.
|
||||
// RunBenchmarkPerfScheduling runs the scheduler performance benchmark tests.
|
||||
//
|
||||
// You can pass your own scheduler plugins via outOfTreePluginRegistry.
|
||||
// Also, you may want to put your plugins in PluginNames variable in this package
|
||||
// to collect metrics for them.
|
||||
// testcaseLabelSelectors is available to select specific test cases to run with labels on them.
|
||||
func RunBenchmarkPerfScheduling(b *testing.B, outOfTreePluginRegistry frameworkruntime.Registry, testcaseLabelSelectors []string) {
|
||||
func RunBenchmarkPerfScheduling(b *testing.B, configFile string, topicName string, outOfTreePluginRegistry frameworkruntime.Registry) {
|
||||
testCases, err := getTestCases(configFile)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
@ -1070,6 +1108,11 @@ func RunBenchmarkPerfScheduling(b *testing.B, outOfTreePluginRegistry frameworkr
|
||||
b.Fatal(err)
|
||||
}
|
||||
|
||||
if testing.Short() {
|
||||
*PerfSchedulingLabelFilter += ",+short"
|
||||
}
|
||||
testcaseLabelSelectors := strings.Split(*PerfSchedulingLabelFilter, ",")
|
||||
|
||||
output := initTestOutput(b)
|
||||
|
||||
// Because we run sequentially, it is possible to change the global
|
||||
@ -1086,7 +1129,7 @@ func RunBenchmarkPerfScheduling(b *testing.B, outOfTreePluginRegistry frameworkr
|
||||
for _, w := range tc.Workloads {
|
||||
b.Run(w.Name, func(b *testing.B) {
|
||||
if !enabled(testcaseLabelSelectors, append(tc.Labels, w.Labels...)...) {
|
||||
b.Skipf("disabled by label filter %v", testcaseLabelSelectors)
|
||||
b.Skipf("disabled by label filter %v", PerfSchedulingLabelFilter)
|
||||
}
|
||||
|
||||
featureGates := featureGatesMerge(tc.FeatureGates, w.FeatureGates)
|
||||
@ -1135,7 +1178,7 @@ func RunBenchmarkPerfScheduling(b *testing.B, outOfTreePluginRegistry frameworkr
|
||||
continue
|
||||
}
|
||||
|
||||
destFile, err := dataFilename(strings.ReplaceAll(fmt.Sprintf("%s_%s_%s.dat", tc.Name, w.Name, runID), "/", "_"))
|
||||
destFile, err := dataFilename(strings.ReplaceAll(fmt.Sprintf("%s_%s_%s_%s.dat", tc.Name, w.Name, topicName, runID), "/", "_"))
|
||||
if err != nil {
|
||||
b.Fatalf("prepare data file: %v", err)
|
||||
}
|
||||
@ -1156,11 +1199,54 @@ func RunBenchmarkPerfScheduling(b *testing.B, outOfTreePluginRegistry frameworkr
|
||||
}
|
||||
})
|
||||
}
|
||||
if err := dataItems2JSONFile(dataItems, b.Name()+"_benchmark"); err != nil {
|
||||
if err := dataItems2JSONFile(dataItems, b.Name()+"_benchmark_"+topicName); err != nil {
|
||||
b.Fatalf("unable to write measured data %+v: %v", dataItems, err)
|
||||
}
|
||||
}
|
||||
|
||||
// RunIntegrationPerfScheduling runs the scheduler performance integration tests.
|
||||
func RunIntegrationPerfScheduling(t *testing.T, configFile string) {
|
||||
testCases, err := getTestCases(configFile)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = validateTestCases(testCases); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if testing.Short() {
|
||||
*TestSchedulingLabelFilter += ",+short"
|
||||
}
|
||||
testcaseLabelSelectors := strings.Split(*TestSchedulingLabelFilter, ",")
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.Name, func(t *testing.T) {
|
||||
for _, w := range tc.Workloads {
|
||||
t.Run(w.Name, func(t *testing.T) {
|
||||
if !enabled(testcaseLabelSelectors, append(tc.Labels, w.Labels...)...) {
|
||||
t.Skipf("disabled by label filter %q", *TestSchedulingLabelFilter)
|
||||
}
|
||||
featureGates := featureGatesMerge(tc.FeatureGates, w.FeatureGates)
|
||||
informerFactory, tCtx := setupTestCase(t, tc, featureGates, nil, nil)
|
||||
|
||||
runWorkload(tCtx, tc, w, informerFactory)
|
||||
|
||||
if featureGates[features.SchedulerQueueingHints] {
|
||||
// In any case, we should make sure InFlightEvents is empty after running the scenario.
|
||||
if err = checkEmptyInFlightEvents(); err != nil {
|
||||
tCtx.Errorf("%s: %s", w.Name, err)
|
||||
}
|
||||
}
|
||||
|
||||
// Reset metrics to prevent metrics generated in current workload gets
|
||||
// carried over to the next workload.
|
||||
legacyregistry.Reset()
|
||||
})
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func loadSchedulerConfig(file string) (*config.KubeSchedulerConfiguration, error) {
|
||||
data, err := os.ReadFile(file)
|
||||
if err != nil {
|
||||
|
@ -1,37 +0,0 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// BenchmarkPerfScheduling is implemented in benchmark_test
|
||||
// to ensure that scheduler_perf can be run from outside kubernetes.
|
||||
package benchmark_test
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
benchmark "k8s.io/kubernetes/test/integration/scheduler_perf"
|
||||
)
|
||||
|
||||
var perfSchedulingLabelFilter = flag.String("perf-scheduling-label-filter", "performance", "comma-separated list of labels which a testcase must have (no prefix or +) or must not have (-), used by BenchmarkPerfScheduling")
|
||||
|
||||
func BenchmarkPerfScheduling(b *testing.B) {
|
||||
if testing.Short() {
|
||||
*perfSchedulingLabelFilter += ",+short"
|
||||
}
|
||||
|
||||
benchmark.RunBenchmarkPerfScheduling(b, nil, strings.Split(*perfSchedulingLabelFilter, ","))
|
||||
}
|
@ -1,69 +0,0 @@
|
||||
/*
|
||||
Copyright 2023 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package benchmark
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"k8s.io/component-base/metrics/legacyregistry"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
)
|
||||
|
||||
var testSchedulingLabelFilter = flag.String("test-scheduling-label-filter", "integration-test,-performance", "comma-separated list of labels which a testcase must have (no prefix or +) or must not have (-), used by TestScheduling")
|
||||
|
||||
func TestScheduling(t *testing.T) {
|
||||
testCases, err := getTestCases(configFile)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = validateTestCases(testCases); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if testing.Short() {
|
||||
*testSchedulingLabelFilter += ",+short"
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.Name, func(t *testing.T) {
|
||||
for _, w := range tc.Workloads {
|
||||
t.Run(w.Name, func(t *testing.T) {
|
||||
if !enabled(strings.Split(*testSchedulingLabelFilter, ","), append(tc.Labels, w.Labels...)...) {
|
||||
t.Skipf("disabled by label filter %q", *testSchedulingLabelFilter)
|
||||
}
|
||||
featureGates := featureGatesMerge(tc.FeatureGates, w.FeatureGates)
|
||||
informerFactory, tCtx := setupTestCase(t, tc, featureGates, nil, nil)
|
||||
|
||||
runWorkload(tCtx, tc, w, informerFactory)
|
||||
|
||||
if featureGates[features.SchedulerQueueingHints] {
|
||||
// In any case, we should make sure InFlightEvents is empty after running the scenario.
|
||||
if err = checkEmptyInFlightEvents(); err != nil {
|
||||
tCtx.Errorf("%s: %s", w.Name, err)
|
||||
}
|
||||
}
|
||||
|
||||
// Reset metrics to prevent metrics generated in current workload gets
|
||||
// carried over to the next workload.
|
||||
legacyregistry.Reset()
|
||||
})
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user