From 1e961af858b2d4651ff0021c1c0ea5fc9f1f8a97 Mon Sep 17 00:00:00 2001
From: Patrick Ohly <patrick.ohly@intel.com>
Date: Mon, 14 Aug 2023 13:00:49 +0200
Subject: [PATCH] scheduler_perf: test case for DRA with multiple claims

The new test case covers pods with multiple claims from multiple drivers. This
leads to different behavior (scheduler waits for information from all drivers
instead of optimistically selecting one node right away) and to more concurrent
updates of the PodSchedulingContext objects.

The test case is currently not enabled for unit testing or integration
testing. It can be used manually with:

   -bench=BenchmarkPerfScheduling/SchedulingWithMultipleResourceClaims/2000pods_100nodes
   ... -perf-scheduling-label-filter=
---
 .../dra/another-resourceclaimtemplate.yaml    |  7 ++
 .../config/dra/another-resourceclass.yaml     |  5 ++
 .../dra/pod-with-many-claim-templates.yaml    | 27 +++++++
 .../config/dra/resourclass.yaml               |  5 --
 .../config/performance-config.yaml            | 76 +++++++++++++++++++
 5 files changed, 115 insertions(+), 5 deletions(-)
 create mode 100644 test/integration/scheduler_perf/config/dra/another-resourceclaimtemplate.yaml
 create mode 100644 test/integration/scheduler_perf/config/dra/another-resourceclass.yaml
 create mode 100644 test/integration/scheduler_perf/config/dra/pod-with-many-claim-templates.yaml
 delete mode 100644 test/integration/scheduler_perf/config/dra/resourclass.yaml

diff --git a/test/integration/scheduler_perf/config/dra/another-resourceclaimtemplate.yaml b/test/integration/scheduler_perf/config/dra/another-resourceclaimtemplate.yaml
new file mode 100644
index 00000000000..f68127b67a8
--- /dev/null
+++ b/test/integration/scheduler_perf/config/dra/another-resourceclaimtemplate.yaml
@@ -0,0 +1,7 @@
+apiVersion: resource.k8s.io/v1alpha1
+kind: ResourceClaimTemplate
+metadata:
+  name: another-test-claim-template
+spec:
+  spec:
+    resourceClassName: another-test-class
diff --git a/test/integration/scheduler_perf/config/dra/another-resourceclass.yaml b/test/integration/scheduler_perf/config/dra/another-resourceclass.yaml
new file mode 100644
index 00000000000..52eb55698b8
--- /dev/null
+++ b/test/integration/scheduler_perf/config/dra/another-resourceclass.yaml
@@ -0,0 +1,5 @@
+apiVersion: resource.k8s.io/v1alpha1
+kind: ResourceClass
+metadata:
+  name: another-test-class
+driverName: another-test-driver.cdi.k8s.io
diff --git a/test/integration/scheduler_perf/config/dra/pod-with-many-claim-templates.yaml b/test/integration/scheduler_perf/config/dra/pod-with-many-claim-templates.yaml
new file mode 100644
index 00000000000..2e1a3c96be2
--- /dev/null
+++ b/test/integration/scheduler_perf/config/dra/pod-with-many-claim-templates.yaml
@@ -0,0 +1,27 @@
+apiVersion: v1
+kind: Pod
+metadata:
+  generateName: test-dra
+spec:
+  containers:
+  - image: registry.k8s.io/pause:3.9
+    name: pause
+    resources:
+      claims:
+      - name: resource-1a
+      - name: resource-1b
+      - name: resource-2a
+      - name: resource-2b
+  resourceClaims:
+  - name: resource-1a
+    source:
+      resourceClaimTemplateName: test-claim-template
+  - name: resource-1b
+    source:
+      resourceClaimTemplateName: test-claim-template
+  - name: resource-2a
+    source:
+      resourceClaimTemplateName: another-test-claim-template
+  - name: resource-2b
+    source:
+      resourceClaimTemplateName: another-test-claim-template
diff --git a/test/integration/scheduler_perf/config/dra/resourclass.yaml b/test/integration/scheduler_perf/config/dra/resourclass.yaml
deleted file mode 100644
index b87692e8f05..00000000000
--- a/test/integration/scheduler_perf/config/dra/resourclass.yaml
+++ /dev/null
@@ -1,5 +0,0 @@
-apiVersion: resource.k8s.io/v1alpha1
-kind: ResourceClass
-metadata:
-  name: scheduler-performance
-driverName: test-driver.cdi.k8s.io
diff --git a/test/integration/scheduler_perf/config/performance-config.yaml b/test/integration/scheduler_perf/config/performance-config.yaml
index 6d2d0e4ac2e..8e4d735baa1 100644
--- a/test/integration/scheduler_perf/config/performance-config.yaml
+++ b/test/integration/scheduler_perf/config/performance-config.yaml
@@ -777,3 +777,79 @@
       initPods: 1000
       measurePods: 1000
       maxClaimsPerNode: 10
+
+# This similar to SchedulingWithResourceClaimTemplate, except
+# that it uses four claims per pod, from two different drivers.
+# This emphasizes a bit more the complexity of collaborative
+# scheduling via PodSchedulingContext.
+- name: SchedulingWithMultipleResourceClaims
+  featureGates:
+    DynamicResourceAllocation: true
+  workloadTemplate:
+  - opcode: createNodes
+    countParam: $nodesWithoutDRA
+  - opcode: createNodes
+    nodeTemplatePath: config/dra/node-with-dra-test-driver.yaml
+    countParam: $nodesWithDRA
+  - opcode: createResourceDriver
+    driverName: test-driver.cdi.k8s.io
+    nodes: scheduler-perf-dra-*
+    maxClaimsPerNodeParam: $maxClaimsPerNode
+  - opcode: createResourceDriver
+    driverName: another-test-driver.cdi.k8s.io
+    nodes: scheduler-perf-dra-*
+    maxClaimsPerNodeParam: $maxClaimsPerNode
+  - opcode: createResourceClass
+    templatePath: config/dra/resourceclass.yaml
+  - opcode: createResourceClass
+    templatePath: config/dra/another-resourceclass.yaml
+  - opcode: createResourceClaimTemplate
+    templatePath: config/dra/resourceclaimtemplate.yaml
+    namespace: init
+  - opcode: createResourceClaimTemplate
+    templatePath: config/dra/another-resourceclaimtemplate.yaml
+    namespace: init
+  - opcode: createPods
+    namespace: init
+    countParam: $initPods
+    podTemplatePath: config/dra/pod-with-many-claim-templates.yaml
+  - opcode: createResourceClaimTemplate
+    templatePath: config/dra/resourceclaimtemplate.yaml
+    namespace: test
+  - opcode: createResourceClaimTemplate
+    templatePath: config/dra/another-resourceclaimtemplate.yaml
+    namespace: test
+  - opcode: createPods
+    namespace: test
+    countParam: $measurePods
+    podTemplatePath: config/dra/pod-with-many-claim-templates.yaml
+    collectMetrics: true
+  workloads:
+  - name: fast
+    params:
+      # This testcase runs through all code paths without
+      # taking too long overall.
+      nodesWithDRA: 1
+      nodesWithoutDRA: 1
+      initPods: 0
+      measurePods: 1
+      maxClaimsPerNode: 20
+  - name: 2000pods_100nodes
+    params:
+      # In this testcase, the number of nodes is smaller
+      # than the limit for the PodScheduling slices.
+      nodesWithDRA: 100
+      nodesWithoutDRA: 0
+      initPods: 1000
+      measurePods: 1000
+      maxClaimsPerNode: 40
+  - name: 2000pods_200nodes
+    params:
+      # In this testcase, the driver and scheduler must
+      # truncate the PotentialNodes and UnsuitableNodes
+      # slices.
+      nodesWithDRA: 200
+      nodesWithoutDRA: 0
+      initPods: 1000
+      measurePods: 1000
+      maxClaimsPerNode: 20