scheduler_perf: test case for DRA with multiple claims

The new test case covers pods with multiple claims from multiple drivers. This
leads to different behavior (scheduler waits for information from all drivers
instead of optimistically selecting one node right away) and to more concurrent
updates of the PodSchedulingContext objects.

The test case is currently not enabled for unit testing or integration
testing. It can be used manually with:

   -bench=BenchmarkPerfScheduling/SchedulingWithMultipleResourceClaims/2000pods_100nodes
   ... -perf-scheduling-label-filter=
This commit is contained in:
Patrick Ohly 2023-08-14 13:00:49 +02:00
parent 0331e98957
commit 1e961af858
5 changed files with 115 additions and 5 deletions

View File

@ -0,0 +1,7 @@
apiVersion: resource.k8s.io/v1alpha1
kind: ResourceClaimTemplate
metadata:
name: another-test-claim-template
spec:
spec:
resourceClassName: another-test-class

View File

@ -0,0 +1,5 @@
apiVersion: resource.k8s.io/v1alpha1
kind: ResourceClass
metadata:
name: another-test-class
driverName: another-test-driver.cdi.k8s.io

View File

@ -0,0 +1,27 @@
apiVersion: v1
kind: Pod
metadata:
generateName: test-dra
spec:
containers:
- image: registry.k8s.io/pause:3.9
name: pause
resources:
claims:
- name: resource-1a
- name: resource-1b
- name: resource-2a
- name: resource-2b
resourceClaims:
- name: resource-1a
source:
resourceClaimTemplateName: test-claim-template
- name: resource-1b
source:
resourceClaimTemplateName: test-claim-template
- name: resource-2a
source:
resourceClaimTemplateName: another-test-claim-template
- name: resource-2b
source:
resourceClaimTemplateName: another-test-claim-template

View File

@ -1,5 +0,0 @@
apiVersion: resource.k8s.io/v1alpha1
kind: ResourceClass
metadata:
name: scheduler-performance
driverName: test-driver.cdi.k8s.io

View File

@ -777,3 +777,79 @@
initPods: 1000
measurePods: 1000
maxClaimsPerNode: 10
# This similar to SchedulingWithResourceClaimTemplate, except
# that it uses four claims per pod, from two different drivers.
# This emphasizes a bit more the complexity of collaborative
# scheduling via PodSchedulingContext.
- name: SchedulingWithMultipleResourceClaims
featureGates:
DynamicResourceAllocation: true
workloadTemplate:
- opcode: createNodes
countParam: $nodesWithoutDRA
- opcode: createNodes
nodeTemplatePath: config/dra/node-with-dra-test-driver.yaml
countParam: $nodesWithDRA
- opcode: createResourceDriver
driverName: test-driver.cdi.k8s.io
nodes: scheduler-perf-dra-*
maxClaimsPerNodeParam: $maxClaimsPerNode
- opcode: createResourceDriver
driverName: another-test-driver.cdi.k8s.io
nodes: scheduler-perf-dra-*
maxClaimsPerNodeParam: $maxClaimsPerNode
- opcode: createResourceClass
templatePath: config/dra/resourceclass.yaml
- opcode: createResourceClass
templatePath: config/dra/another-resourceclass.yaml
- opcode: createResourceClaimTemplate
templatePath: config/dra/resourceclaimtemplate.yaml
namespace: init
- opcode: createResourceClaimTemplate
templatePath: config/dra/another-resourceclaimtemplate.yaml
namespace: init
- opcode: createPods
namespace: init
countParam: $initPods
podTemplatePath: config/dra/pod-with-many-claim-templates.yaml
- opcode: createResourceClaimTemplate
templatePath: config/dra/resourceclaimtemplate.yaml
namespace: test
- opcode: createResourceClaimTemplate
templatePath: config/dra/another-resourceclaimtemplate.yaml
namespace: test
- opcode: createPods
namespace: test
countParam: $measurePods
podTemplatePath: config/dra/pod-with-many-claim-templates.yaml
collectMetrics: true
workloads:
- name: fast
params:
# This testcase runs through all code paths without
# taking too long overall.
nodesWithDRA: 1
nodesWithoutDRA: 1
initPods: 0
measurePods: 1
maxClaimsPerNode: 20
- name: 2000pods_100nodes
params:
# In this testcase, the number of nodes is smaller
# than the limit for the PodScheduling slices.
nodesWithDRA: 100
nodesWithoutDRA: 0
initPods: 1000
measurePods: 1000
maxClaimsPerNode: 40
- name: 2000pods_200nodes
params:
# In this testcase, the driver and scheduler must
# truncate the PotentialNodes and UnsuitableNodes
# slices.
nodesWithDRA: 200
nodesWithoutDRA: 0
initPods: 1000
measurePods: 1000
maxClaimsPerNode: 20