From 1e961af858b2d4651ff0021c1c0ea5fc9f1f8a97 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Mon, 14 Aug 2023 13:00:49 +0200 Subject: [PATCH] scheduler_perf: test case for DRA with multiple claims The new test case covers pods with multiple claims from multiple drivers. This leads to different behavior (scheduler waits for information from all drivers instead of optimistically selecting one node right away) and to more concurrent updates of the PodSchedulingContext objects. The test case is currently not enabled for unit testing or integration testing. It can be used manually with: -bench=BenchmarkPerfScheduling/SchedulingWithMultipleResourceClaims/2000pods_100nodes ... -perf-scheduling-label-filter= --- .../dra/another-resourceclaimtemplate.yaml | 7 ++ .../config/dra/another-resourceclass.yaml | 5 ++ .../dra/pod-with-many-claim-templates.yaml | 27 +++++++ .../config/dra/resourclass.yaml | 5 -- .../config/performance-config.yaml | 76 +++++++++++++++++++ 5 files changed, 115 insertions(+), 5 deletions(-) create mode 100644 test/integration/scheduler_perf/config/dra/another-resourceclaimtemplate.yaml create mode 100644 test/integration/scheduler_perf/config/dra/another-resourceclass.yaml create mode 100644 test/integration/scheduler_perf/config/dra/pod-with-many-claim-templates.yaml delete mode 100644 test/integration/scheduler_perf/config/dra/resourclass.yaml diff --git a/test/integration/scheduler_perf/config/dra/another-resourceclaimtemplate.yaml b/test/integration/scheduler_perf/config/dra/another-resourceclaimtemplate.yaml new file mode 100644 index 00000000000..f68127b67a8 --- /dev/null +++ b/test/integration/scheduler_perf/config/dra/another-resourceclaimtemplate.yaml @@ -0,0 +1,7 @@ +apiVersion: resource.k8s.io/v1alpha1 +kind: ResourceClaimTemplate +metadata: + name: another-test-claim-template +spec: + spec: + resourceClassName: another-test-class diff --git a/test/integration/scheduler_perf/config/dra/another-resourceclass.yaml b/test/integration/scheduler_perf/config/dra/another-resourceclass.yaml new file mode 100644 index 00000000000..52eb55698b8 --- /dev/null +++ b/test/integration/scheduler_perf/config/dra/another-resourceclass.yaml @@ -0,0 +1,5 @@ +apiVersion: resource.k8s.io/v1alpha1 +kind: ResourceClass +metadata: + name: another-test-class +driverName: another-test-driver.cdi.k8s.io diff --git a/test/integration/scheduler_perf/config/dra/pod-with-many-claim-templates.yaml b/test/integration/scheduler_perf/config/dra/pod-with-many-claim-templates.yaml new file mode 100644 index 00000000000..2e1a3c96be2 --- /dev/null +++ b/test/integration/scheduler_perf/config/dra/pod-with-many-claim-templates.yaml @@ -0,0 +1,27 @@ +apiVersion: v1 +kind: Pod +metadata: + generateName: test-dra +spec: + containers: + - image: registry.k8s.io/pause:3.9 + name: pause + resources: + claims: + - name: resource-1a + - name: resource-1b + - name: resource-2a + - name: resource-2b + resourceClaims: + - name: resource-1a + source: + resourceClaimTemplateName: test-claim-template + - name: resource-1b + source: + resourceClaimTemplateName: test-claim-template + - name: resource-2a + source: + resourceClaimTemplateName: another-test-claim-template + - name: resource-2b + source: + resourceClaimTemplateName: another-test-claim-template diff --git a/test/integration/scheduler_perf/config/dra/resourclass.yaml b/test/integration/scheduler_perf/config/dra/resourclass.yaml deleted file mode 100644 index b87692e8f05..00000000000 --- a/test/integration/scheduler_perf/config/dra/resourclass.yaml +++ /dev/null @@ -1,5 +0,0 @@ -apiVersion: resource.k8s.io/v1alpha1 -kind: ResourceClass -metadata: - name: scheduler-performance -driverName: test-driver.cdi.k8s.io diff --git a/test/integration/scheduler_perf/config/performance-config.yaml b/test/integration/scheduler_perf/config/performance-config.yaml index 6d2d0e4ac2e..8e4d735baa1 100644 --- a/test/integration/scheduler_perf/config/performance-config.yaml +++ b/test/integration/scheduler_perf/config/performance-config.yaml @@ -777,3 +777,79 @@ initPods: 1000 measurePods: 1000 maxClaimsPerNode: 10 + +# This similar to SchedulingWithResourceClaimTemplate, except +# that it uses four claims per pod, from two different drivers. +# This emphasizes a bit more the complexity of collaborative +# scheduling via PodSchedulingContext. +- name: SchedulingWithMultipleResourceClaims + featureGates: + DynamicResourceAllocation: true + workloadTemplate: + - opcode: createNodes + countParam: $nodesWithoutDRA + - opcode: createNodes + nodeTemplatePath: config/dra/node-with-dra-test-driver.yaml + countParam: $nodesWithDRA + - opcode: createResourceDriver + driverName: test-driver.cdi.k8s.io + nodes: scheduler-perf-dra-* + maxClaimsPerNodeParam: $maxClaimsPerNode + - opcode: createResourceDriver + driverName: another-test-driver.cdi.k8s.io + nodes: scheduler-perf-dra-* + maxClaimsPerNodeParam: $maxClaimsPerNode + - opcode: createResourceClass + templatePath: config/dra/resourceclass.yaml + - opcode: createResourceClass + templatePath: config/dra/another-resourceclass.yaml + - opcode: createResourceClaimTemplate + templatePath: config/dra/resourceclaimtemplate.yaml + namespace: init + - opcode: createResourceClaimTemplate + templatePath: config/dra/another-resourceclaimtemplate.yaml + namespace: init + - opcode: createPods + namespace: init + countParam: $initPods + podTemplatePath: config/dra/pod-with-many-claim-templates.yaml + - opcode: createResourceClaimTemplate + templatePath: config/dra/resourceclaimtemplate.yaml + namespace: test + - opcode: createResourceClaimTemplate + templatePath: config/dra/another-resourceclaimtemplate.yaml + namespace: test + - opcode: createPods + namespace: test + countParam: $measurePods + podTemplatePath: config/dra/pod-with-many-claim-templates.yaml + collectMetrics: true + workloads: + - name: fast + params: + # This testcase runs through all code paths without + # taking too long overall. + nodesWithDRA: 1 + nodesWithoutDRA: 1 + initPods: 0 + measurePods: 1 + maxClaimsPerNode: 20 + - name: 2000pods_100nodes + params: + # In this testcase, the number of nodes is smaller + # than the limit for the PodScheduling slices. + nodesWithDRA: 100 + nodesWithoutDRA: 0 + initPods: 1000 + measurePods: 1000 + maxClaimsPerNode: 40 + - name: 2000pods_200nodes + params: + # In this testcase, the driver and scheduler must + # truncate the PotentialNodes and UnsuitableNodes + # slices. + nodesWithDRA: 200 + nodesWithoutDRA: 0 + initPods: 1000 + measurePods: 1000 + maxClaimsPerNode: 20