Files
kubernetes/test/integration/scheduler_perf/dra/performance-config.yaml

323 lines
10 KiB
YAML

# The following labels are used in this file. (listed in ascending order of the number of covered test cases)
#
# - integration-test: test cases to run as the integration test, usually to spot some issues in the scheduler implementation or scheduler-perf itself.
# - performance: test cases to run in the performance test.
# - short: supplemental label for the above two labels (must not used alone), which literally means short execution time test cases.
#
# Specifically, the CIs use labels like the following:
# - `ci-kubernetes-integration-master` (`integration-test`): Test cases are chosen based on a tradeoff between code coverage and overall runtime.
# It basically covers all test cases but with their smallest workload.
# - `pull-kubernetes-integration` (`integration-test`,`short`): Test cases are chosen so that they should take less than total 5 min to complete.
# - `ci-benchmark-scheduler-perf` (`performance`): Long enough test cases are chosen (ideally, longer than 10 seconds)
# to provide meaningful samples for the pod scheduling rate.
#
# Also, `performance`+`short` isn't used in the CIs, but it's used to test the performance test locally.
# (Sometimes, the test cases with `integration-test` are too small to spot issues.)
#
# Combining `performance` and `short` selects suitable workloads for a local
# before/after comparisons with benchstat.
# SchedulingWithResourceClaimTemplate uses a ResourceClaimTemplate
# and dynamically creates ResourceClaim instances for each pod. Node, pod and
# device counts are chosen so that the cluster gets filled up completely.
- name: SchedulingWithResourceClaimTemplate
featureGates:
DynamicResourceAllocation: true
# SchedulerQueueingHints: true
workloadTemplate:
- opcode: createNodes
countParam: $nodesWithoutDRA
- opcode: createNodes
nodeTemplatePath: templates/node-with-dra-test-driver.yaml
countParam: $nodesWithDRA
- opcode: createResourceDriver
driverName: test-driver.cdi.k8s.io
nodes: scheduler-perf-dra-*
maxClaimsPerNodeParam: $maxClaimsPerNode
- opcode: createAny
templatePath: templates/deviceclass.yaml
- opcode: createAny
templatePath: templates/resourceclaimtemplate.yaml
namespace: init
- opcode: createPods
namespace: init
countParam: $initPods
podTemplatePath: templates/pod-with-claim-template.yaml
- opcode: createAny
templatePath: templates/resourceclaimtemplate.yaml
namespace: test
- opcode: createPods
namespace: test
countParam: $measurePods
podTemplatePath: templates/pod-with-claim-template.yaml
collectMetrics: true
workloads:
- name: fast
featureGates:
SchedulerQueueingHints: false
labels: [integration-test, short]
params:
# This testcase runs through all code paths without
# taking too long overall.
nodesWithDRA: 1
nodesWithoutDRA: 1
initPods: 0
measurePods: 10
maxClaimsPerNode: 10
- name: fast_QueueingHintsEnabled
featureGates:
SchedulerQueueingHints: true
labels: [integration-test, short]
params:
# This testcase runs through all code paths without
# taking too long overall.
nodesWithDRA: 1
nodesWithoutDRA: 1
initPods: 0
measurePods: 10
maxClaimsPerNode: 10
- name: 2000pods_100nodes
params:
nodesWithDRA: 100
nodesWithoutDRA: 0
initPods: 1000
measurePods: 1000
maxClaimsPerNode: 20
- name: 2000pods_200nodes
params:
nodesWithDRA: 200
nodesWithoutDRA: 0
initPods: 1000
measurePods: 1000
maxClaimsPerNode: 10
- name: 5000pods_500nodes
params:
nodesWithDRA: 500
nodesWithoutDRA: 0
initPods: 2500
measurePods: 2500
maxClaimsPerNode: 10
# SteadyStateResourceClaimTemplate uses a ResourceClaimTemplate and
# dynamically creates ResourceClaim instances for each pod. It creates ten
# pods, waits for them to be scheduled, deletes them, and starts again,
# so the cluster remains at the same level of utilization.
#
# The number of already allocated claims can be varied, thus simulating
# various degrees of pre-existing resource utilization.
- name: SteadyStateClusterResourceClaimTemplate
featureGates:
DynamicResourceAllocation: true
# SchedulerQueueingHints: true
workloadTemplate:
- opcode: createNodes
countParam: $nodesWithoutDRA
- opcode: createNodes
nodeTemplatePath: templates/node-with-dra-test-driver.yaml
countParam: $nodesWithDRA
- opcode: createResourceDriver
driverName: test-driver.cdi.k8s.io
nodes: scheduler-perf-dra-*
maxClaimsPerNodeParam: $maxClaimsPerNode
- opcode: createAny
templatePath: templates/deviceclass.yaml
- opcode: createAny
templatePath: templates/resourceclaim.yaml
countParam: $initClaims
namespace: init
- opcode: allocResourceClaims
namespace: init
- opcode: createAny
templatePath: templates/resourceclaimtemplate.yaml
namespace: test
- opcode: createPods
namespace: test
count: 10
steadyState: true
durationParam: $duration
podTemplatePath: templates/pod-with-claim-template.yaml
collectMetrics: true
workloads:
- name: fast
featureGates:
SchedulerQueueingHints: false
labels: [integration-test, short]
params:
# This testcase runs through all code paths without
# taking too long overall.
nodesWithDRA: 1
nodesWithoutDRA: 1
initClaims: 0
maxClaimsPerNode: 10
duration: 2s
- name: fast_QueueingHintsEnabled
featureGates:
SchedulerQueueingHints: true
labels: [integration-test, short]
params:
# This testcase runs through all code paths without
# taking too long overall.
nodesWithDRA: 1
nodesWithoutDRA: 1
initClaims: 0
maxClaimsPerNode: 10
duration: 2s
- name: empty_100nodes
params:
nodesWithDRA: 100
nodesWithoutDRA: 0
initClaims: 0
maxClaimsPerNode: 10
duration: 10s
- name: empty_200nodes
params:
nodesWithDRA: 200
nodesWithoutDRA: 0
initClaims: 0
maxClaimsPerNode: 10
duration: 10s
- name: empty_500nodes
params:
nodesWithDRA: 500
nodesWithoutDRA: 0
initClaims: 0
maxClaimsPerNode: 10
duration: 10s
# In the "half" scenarios, half of the devices are in use.
- name: half_100nodes
params:
nodesWithDRA: 100
nodesWithoutDRA: 0
initClaims: 500
maxClaimsPerNode: 10
duration: 10s
- name: half_200nodes
params:
nodesWithDRA: 200
nodesWithoutDRA: 0
initClaims: 1000
maxClaimsPerNode: 10
duration: 10s
- name: half_500nodes
params:
nodesWithDRA: 500
nodesWithoutDRA: 0
initClaims: 2500
maxClaimsPerNode: 10
duration: 10s
# In the "full" scenarios, the cluster can accommodate exactly 10 additional pods.
- name: full_100nodes
params:
nodesWithDRA: 100
nodesWithoutDRA: 0
initClaims: 990
maxClaimsPerNode: 10
duration: 10s
- name: full_200nodes
params:
nodesWithDRA: 200
nodesWithoutDRA: 0
initClaims: 1990
maxClaimsPerNode: 10
duration: 10s
- name: full_500nodes
params:
nodesWithDRA: 500
nodesWithoutDRA: 0
initClaims: 4990
maxClaimsPerNode: 10
duration: 10s
# SchedulingWithResourceClaimTemplate uses ResourceClaims
# with deterministic names that are shared between pods.
# There is a fixed ratio of 1:5 between claims and pods.
- name: SchedulingWithResourceClaim
featureGates:
DynamicResourceAllocation: true
# SchedulerQueueingHints: true
workloadTemplate:
- opcode: createNodes
countParam: $nodesWithoutDRA
- opcode: createNodes
nodeTemplatePath: templates/node-with-dra-test-driver.yaml
countParam: $nodesWithDRA
- opcode: createResourceDriver
driverName: test-driver.cdi.k8s.io
nodes: scheduler-perf-dra-*
maxClaimsPerNodeParam: $maxClaimsPerNode
- opcode: createAny
templatePath: templates/deviceclass.yaml
- opcode: createAny
templatePath: templates/resourceclaim.yaml
namespace: init
countParam: $initClaims
- opcode: createPods
namespace: init
countParam: $initPods
podTemplatePath: templates/pod-with-claim-ref.yaml
- opcode: createAny
templatePath: templates/resourceclaim.yaml
namespace: test
countParam: $measureClaims
- opcode: createPods
namespace: test
countParam: $measurePods
podTemplatePath: templates/pod-with-claim-ref.yaml
collectMetrics: true
workloads:
- name: fast
featureGates:
SchedulerQueueingHints: false
labels: [integration-test, short]
params:
# This testcase runs through all code paths without
# taking too long overall.
nodesWithDRA: 1
nodesWithoutDRA: 1
initPods: 0
initClaims: 0
measurePods: 10
measureClaims: 2 # must be measurePods / 5
maxClaimsPerNode: 2
- name: fast_QueueingHintsEnabled
featureGates:
SchedulerQueueingHints: true
labels: [integration-test, short]
params:
# This testcase runs through all code paths without
# taking too long overall.
nodesWithDRA: 1
nodesWithoutDRA: 1
initPods: 0
initClaims: 0
measurePods: 10
measureClaims: 2 # must be measurePods / 5
maxClaimsPerNode: 2
- name: 2000pods_100nodes
params:
nodesWithDRA: 100
nodesWithoutDRA: 0
initPods: 1000
initClaims: 200 # must be initPods / 5
measurePods: 1000
measureClaims: 200 # must be initPods / 5
maxClaimsPerNode: 4
- name: 2000pods_200nodes
params:
nodesWithDRA: 200
nodesWithoutDRA: 0
initPods: 1000
initClaims: 200 # must be initPods / 5
measurePods: 1000
measureClaims: 200 # must be measurePods / 5
maxClaimsPerNode: 2
- name: 5000pods_500nodes
params:
nodesWithDRA: 500
nodesWithoutDRA: 0
initPods: 2500
initClaims: 500 # must be initPods / 5
measurePods: 2500
measureClaims: 500 # must be measurePods / 5
maxClaimsPerNode: 2