scheduler_perf: add DRA structured parameters test with shared claims

Several pods sharing the same claim is not common, but can be useful and thus should get tested. Before, createPods and createAny operations were not able to do this because each generated object was the same. What we need are different, predictable names of the claims (from createAny) and different references to those in the pods (from createPods). Now text/template processing with the index number of the pod respectively claim as input is used to inject these varying fields. A "div" function is needed to use the same claim in several different pods. While at it, some existing test cases get cleaned up a bit (removal of incorrect comments, adding comments for testing with queuing hints).
2026-01-05 07:27:21 +00:00 · 2024-04-26 08:31:10 +02:00
parent b498eb9740
commit d88a153086
9 changed files with 236 additions and 24 deletions
--- a/test/integration/scheduler_perf/config/dra/pod-with-claim-ref.yaml
+++ b/test/integration/scheduler_perf/config/dra/pod-with-claim-ref.yaml
@@ -0,0 +1,16 @@
+apiVersion: v1
+kind: Pod
+metadata:
+  name: test-dra-{{.Index}}
+spec:
+  containers:
+  - image: registry.k8s.io/pause:3.9
+    name: pause
+    resources:
+      claims:
+      - name: resource
+  resourceClaims:
+  - name: resource
+    source:
+      # Five pods share access to the same claim.
+      resourceClaimName: test-claim-{{div .Index 5}}
--- a/test/integration/scheduler_perf/config/dra/resourceclaim-structured.yaml
+++ b/test/integration/scheduler_perf/config/dra/resourceclaim-structured.yaml
@@ -0,0 +1,10 @@
+apiVersion: resource.k8s.io/v1alpha2
+kind: ResourceClaim
+metadata:
+  name: test-claim-{{.Index}}
+spec:
+  resourceClassName: test-class
+  parametersRef:
+    apiGroup: resource.k8s.io
+    kind: ResourceClaimParameters
+    name: test-claim-parameters
--- a/test/integration/scheduler_perf/config/dra/resourceclaim.yaml
+++ b/test/integration/scheduler_perf/config/dra/resourceclaim.yaml
@@ -0,0 +1,6 @@
+apiVersion: resource.k8s.io/v1alpha2
+kind: ResourceClaim
+metadata:
+  name: test-claim-{{.Index}}
+spec:
+  resourceClassName: test-class
--- a/test/integration/scheduler_perf/config/dra/resourceclaimparameters.yaml
+++ b/test/integration/scheduler_perf/config/dra/resourceclaimparameters.yaml
@@ -2,6 +2,7 @@ apiVersion: resource.k8s.io/v1alpha2
 kind: ResourceClaimParameters
 metadata:
  name: test-claim-parameters
+shareable: true
 driverRequests:
 - driverName: test-driver.cdi.k8s.io
  requests:
--- a/test/integration/scheduler_perf/config/performance-config.yaml
+++ b/test/integration/scheduler_perf/config/performance-config.yaml
@@ -746,6 +746,7 @@
 - name: SchedulingWithResourceClaimTemplate
  featureGates:
    DynamicResourceAllocation: true
+    # SchedulerQueueingHints: true
  workloadTemplate:
  - opcode: createNodes
    countParam: $nodesWithoutDRA
@@ -812,6 +813,7 @@
 - name: SchedulingWithMultipleResourceClaims
  featureGates:
    DynamicResourceAllocation: true
+    # SchedulerQueueingHints: true
  workloadTemplate:
  - opcode: createNodes
    countParam: $nodesWithoutDRA
@@ -887,6 +889,7 @@
 - name: SchedulingWithResourceClaimTemplateStructured
  featureGates:
    DynamicResourceAllocation: true
+    # SchedulerQueueingHints: true
  workloadTemplate:
  - opcode: createNodes
    countParam: $nodesWithoutDRA
@@ -935,8 +938,6 @@
  - name: 2000pods_100nodes
    labels: [performance, fast]
    params:
-      # In this testcase, the number of nodes is smaller
-      # than the limit for the PodScheduling slices.
      nodesWithDRA: 100
      nodesWithoutDRA: 0
      initPods: 1000
@@ -944,11 +945,102 @@
      maxClaimsPerNode: 20
  - name: 2000pods_200nodes
    params:
-      # In this testcase, the driver and scheduler must
-      # truncate the PotentialNodes and UnsuitableNodes
-      # slices.
      nodesWithDRA: 200
      nodesWithoutDRA: 0
      initPods: 1000
      measurePods: 1000
      maxClaimsPerNode: 10
+  - name: 5000pods_500nodes
+    params:
+      nodesWithDRA: 500
+      nodesWithoutDRA: 0
+      initPods: 2500
+      measurePods: 2500
+      maxClaimsPerNode: 10
+
+# SchedulingWithResourceClaimTemplate uses ResourceClaims
+# with deterministic names that are shared between pods.
+# There is a fixed ratio of 1:5 between claims and pods.
+#
+# The driver uses structured parameters.
+- name: SchedulingWithResourceClaimStructured
+  featureGates:
+    DynamicResourceAllocation: true
+    # SchedulerQueueingHints: true
+  workloadTemplate:
+  - opcode: createNodes
+    countParam: $nodesWithoutDRA
+  - opcode: createNodes
+    nodeTemplatePath: config/dra/node-with-dra-test-driver.yaml
+    countParam: $nodesWithDRA
+  - opcode: createResourceDriver
+    driverName: test-driver.cdi.k8s.io
+    nodes: scheduler-perf-dra-*
+    maxClaimsPerNodeParam: $maxClaimsPerNode
+    structuredParameters: true
+  - opcode: createAny
+    templatePath: config/dra/resourceclass-structured.yaml
+  - opcode: createAny
+    templatePath: config/dra/resourceclaimparameters.yaml
+    namespace: init
+  - opcode: createAny
+    templatePath: config/dra/resourceclaim-structured.yaml
+    namespace: init
+    countParam: $initClaims
+  - opcode: createPods
+    namespace: init
+    countParam: $initPods
+    podTemplatePath: config/dra/pod-with-claim-ref.yaml
+  - opcode: createAny
+    templatePath: config/dra/resourceclaimparameters.yaml
+    namespace: test
+  - opcode: createAny
+    templatePath: config/dra/resourceclaim-structured.yaml
+    namespace: test
+    countParam: $measureClaims
+  - opcode: createPods
+    namespace: test
+    countParam: $measurePods
+    podTemplatePath: config/dra/pod-with-claim-ref.yaml
+    collectMetrics: true
+  workloads:
+  - name: fast
+    labels: [integration-test, fast]
+    params:
+      # This testcase runs through all code paths without
+      # taking too long overall.
+      nodesWithDRA: 1
+      nodesWithoutDRA: 1
+      initPods: 0
+      initClaims: 0
+      measurePods: 10
+      measureClaims: 2 # must be measurePods / 5
+      maxClaimsPerNode: 2
+  - name: 2000pods_100nodes
+    labels: [performance, fast]
+    params:
+      nodesWithDRA: 100
+      nodesWithoutDRA: 0
+      initPods: 1000
+      initClaims: 200 # must be initPods / 5
+      measurePods: 1000
+      measureClaims: 200 # must be initPods / 5
+      maxClaimsPerNode: 4
+  - name: 2000pods_200nodes
+    params:
+      nodesWithDRA: 200
+      nodesWithoutDRA: 0
+      initPods: 1000
+      initClaims: 200 # must be initPods / 5
+      measurePods: 1000
+      measureClaims: 200 # must be measurePods / 5
+      maxClaimsPerNode: 2
+  - name: 5000pods_500nodes
+    params:
+      nodesWithDRA: 500
+      nodesWithoutDRA: 0
+      initPods: 2500
+      initClaims: 500 # must be initPods / 5
+      measurePods: 2500
+      measureClaims: 500 # must be measurePods / 5
+      maxClaimsPerNode: 2
--- a/test/integration/scheduler_perf/create.go
+++ b/test/integration/scheduler_perf/create.go
@@ -17,8 +17,11 @@ limitations under the License.
 package benchmark

 import (
+	"bytes"
 	"context"
 	"fmt"
+	"html/template"
+	"os"
 	"time"

 	apierrors "k8s.io/apimachinery/pkg/api/errors"
@@ -30,6 +33,8 @@ import (
 	"k8s.io/client-go/restmapper"
 	"k8s.io/klog/v2"
 	"k8s.io/kubernetes/test/utils/ktesting"
+	"k8s.io/utils/ptr"
+	"sigs.k8s.io/yaml"
 )

 // createAny defines an op where some object gets created from a YAML file.
@@ -40,7 +45,13 @@ type createAny struct {
 	// Namespace the object should be created in. Must be empty for cluster-scoped objects.
 	Namespace string
 	// Path to spec file describing the object to create.
+	// This will be processed with text/template.
+	// .Index will be in the range [0, Count-1] when creating
+	// more than one object. .Count is the total number of objects.
 	TemplatePath string
+	// Count determines how many objects get created. Defaults to 1 if unset.
+	Count      *int
+	CountParam string
 }

 var _ runnableOp = &createAny{}
@@ -61,8 +72,15 @@ func (c *createAny) collectsMetrics() bool {
 	return false
 }

-func (c *createAny) patchParams(w *workload) (realOp, error) {
-	return c, c.isValid(false)
+func (c createAny) patchParams(w *workload) (realOp, error) {
+	if c.CountParam != "" {
+		count, err := w.Params.get(c.CountParam[1:])
+		if err != nil {
+			return nil, err
+		}
+		c.Count = ptr.To(count)
+	}
+	return &c, c.isValid(false)
 }

 func (c *createAny) requiredNamespaces() []string {
@@ -73,8 +91,18 @@ func (c *createAny) requiredNamespaces() []string {
 }

 func (c *createAny) run(tCtx ktesting.TContext) {
+	count := 1
+	if c.Count != nil {
+		count = *c.Count
+	}
+	for index := 0; index < count; index++ {
+		c.create(tCtx, map[string]any{"Index": index, "Count": count})
+	}
+}
+
+func (c *createAny) create(tCtx ktesting.TContext, env map[string]any) {
 	var obj *unstructured.Unstructured
-	if err := getSpecFromFile(&c.TemplatePath, &obj); err != nil {
+	if err := getSpecFromTextTemplateFile(c.TemplatePath, env, &obj); err != nil {
 		tCtx.Fatalf("%s: parsing failed: %v", c.TemplatePath, err)
 	}

@@ -143,3 +171,23 @@ func (c *createAny) run(tCtx ktesting.TContext) {
 		}
 	}
 }
+
+func getSpecFromTextTemplateFile(path string, env map[string]any, spec interface{}) error {
+	content, err := os.ReadFile(path)
+	if err != nil {
+		return err
+	}
+	fm := template.FuncMap{"div": func(a, b int) int {
+		return a / b
+	}}
+	tmpl, err := template.New("object").Funcs(fm).Parse(string(content))
+	if err != nil {
+		return err
+	}
+	var buffer bytes.Buffer
+	if err := tmpl.Execute(&buffer, env); err != nil {
+		return err
+	}
+
+	return yaml.UnmarshalStrict(buffer.Bytes(), spec)
+}
--- a/test/integration/scheduler_perf/dra.go
+++ b/test/integration/scheduler_perf/dra.go
@@ -177,6 +177,7 @@ func (op *createResourceDriverOp) run(tCtx ktesting.TContext) {
 		DriverName:     op.DriverName,
 		NodeLocal:      true,
 		MaxAllocations: op.MaxClaimsPerNode,
+		Shareable:      true,
 	}

 	nodes, err := tCtx.Client().CoreV1().Nodes().List(tCtx, metav1.ListOptions{})
--- a/test/integration/scheduler_perf/scheduler_perf.go
+++ b/test/integration/scheduler_perf/scheduler_perf.go
@@ -1432,16 +1432,12 @@ func validateTestCases(testCases []*testCase) error {
 }

 func getPodStrategy(cpo *createPodsOp) (testutils.TestPodCreateStrategy, error) {
-	basePod := makeBasePod()
+	podTemplate := testutils.StaticPodTemplate(makeBasePod())
 	if cpo.PodTemplatePath != nil {
-		var err error
-		basePod, err = getPodSpecFromFile(cpo.PodTemplatePath)
-		if err != nil {
-			return nil, err
-		}
+		podTemplate = podTemplateFromFile(*cpo.PodTemplatePath)
 	}
 	if cpo.PersistentVolumeClaimTemplatePath == nil {
-		return testutils.NewCustomCreatePodStrategy(basePod), nil
+		return testutils.NewCustomCreatePodStrategy(podTemplate), nil
 	}

 	pvTemplate, err := getPersistentVolumeSpecFromFile(cpo.PersistentVolumeTemplatePath)
@@ -1452,7 +1448,7 @@ func getPodStrategy(cpo *createPodsOp) (testutils.TestPodCreateStrategy, error)
 	if err != nil {
 		return nil, err
 	}
-	return testutils.NewCreatePodWithPersistentVolumeStrategy(pvcTemplate, getCustomVolumeFactory(pvTemplate), basePod), nil
+	return testutils.NewCreatePodWithPersistentVolumeStrategy(pvcTemplate, getCustomVolumeFactory(pvTemplate), podTemplate), nil
 }

 func getNodeSpecFromFile(path *string) (*v1.Node, error) {
@@ -1463,9 +1459,11 @@ func getNodeSpecFromFile(path *string) (*v1.Node, error) {
 	return nodeSpec, nil
 }

-func getPodSpecFromFile(path *string) (*v1.Pod, error) {
+type podTemplateFromFile string
+
+func (f podTemplateFromFile) GetPodTemplate(index, count int) (*v1.Pod, error) {
 	podSpec := &v1.Pod{}
-	if err := getSpecFromFile(path, podSpec); err != nil {
+	if err := getSpecFromTextTemplateFile(string(f), map[string]any{"Index": index, "Count": count}, podSpec); err != nil {
 		return nil, fmt.Errorf("parsing Pod: %w", err)
 	}
 	return podSpec, nil
--- a/test/utils/runners.go
+++ b/test/utils/runners.go
@@ -1235,14 +1235,22 @@ func makeCreatePod(client clientset.Interface, namespace string, podTemplate *v1
 	return nil
 }

-func CreatePod(ctx context.Context, client clientset.Interface, namespace string, podCount int, podTemplate *v1.Pod) error {
+func CreatePod(ctx context.Context, client clientset.Interface, namespace string, podCount int, podTemplate PodTemplate) error {
 	var createError error
 	lock := sync.Mutex{}
 	createPodFunc := func(i int) {
+		pod, err := podTemplate.GetPodTemplate(i, podCount)
+		if err != nil {
+			lock.Lock()
+			defer lock.Unlock()
+			createError = err
+			return
+		}
+		pod = pod.DeepCopy()
 		// client-go writes into the object that is passed to Create,
 		// causing a data race unless we create a new copy for each
 		// parallel call.
-		if err := makeCreatePod(client, namespace, podTemplate.DeepCopy()); err != nil {
+		if err := makeCreatePod(client, namespace, pod); err != nil {
 			lock.Lock()
 			defer lock.Unlock()
 			createError = err
@@ -1257,7 +1265,7 @@ func CreatePod(ctx context.Context, client clientset.Interface, namespace string
 	return createError
 }

-func CreatePodWithPersistentVolume(ctx context.Context, client clientset.Interface, namespace string, claimTemplate *v1.PersistentVolumeClaim, factory volumeFactory, podTemplate *v1.Pod, count int, bindVolume bool) error {
+func CreatePodWithPersistentVolume(ctx context.Context, client clientset.Interface, namespace string, claimTemplate *v1.PersistentVolumeClaim, factory volumeFactory, podTemplate PodTemplate, count int, bindVolume bool) error {
 	var createError error
 	lock := sync.Mutex{}
 	createPodFunc := func(i int) {
@@ -1318,7 +1326,14 @@ func CreatePodWithPersistentVolume(ctx context.Context, client clientset.Interfa
 		}

 		// pod
-		pod := podTemplate.DeepCopy()
+		pod, err := podTemplate.GetPodTemplate(i, count)
+		if err != nil {
+			lock.Lock()
+			defer lock.Unlock()
+			createError = fmt.Errorf("error getting pod template: %s", err)
+			return
+		}
+		pod = pod.DeepCopy()
 		pod.Spec.Volumes = []v1.Volume{
 			{
 				Name: "vol",
@@ -1345,7 +1360,7 @@ func CreatePodWithPersistentVolume(ctx context.Context, client clientset.Interfa
 	return createError
 }

-func NewCustomCreatePodStrategy(podTemplate *v1.Pod) TestPodCreateStrategy {
+func NewCustomCreatePodStrategy(podTemplate PodTemplate) TestPodCreateStrategy {
 	return func(ctx context.Context, client clientset.Interface, namespace string, podCount int) error {
 		return CreatePod(ctx, client, namespace, podCount, podTemplate)
 	}
@@ -1354,7 +1369,32 @@ func NewCustomCreatePodStrategy(podTemplate *v1.Pod) TestPodCreateStrategy {
 // volumeFactory creates an unique PersistentVolume for given integer.
 type volumeFactory func(uniqueID int) *v1.PersistentVolume

-func NewCreatePodWithPersistentVolumeStrategy(claimTemplate *v1.PersistentVolumeClaim, factory volumeFactory, podTemplate *v1.Pod) TestPodCreateStrategy {
+// PodTemplate is responsible for creating a v1.Pod instance that is ready
+// to be sent to the API server.
+type PodTemplate interface {
+	// GetPodTemplate returns a pod template for one out of many different pods.
+	// Pods with numbers in the range [index, index+count-1] will be created
+	// based on what GetPodTemplate returns. It gets called multiple times
+	// with a fixed index and increasing count parameters. This number can,
+	// but doesn't have to be, used to modify parts of the pod spec like
+	// for example a named reference to some other object.
+	GetPodTemplate(index, count int) (*v1.Pod, error)
+}
+
+// StaticPodTemplate returns an implementation of PodTemplate for a fixed pod that is the same regardless of the index.
+func StaticPodTemplate(pod *v1.Pod) PodTemplate {
+	return (*staticPodTemplate)(pod)
+}
+
+type staticPodTemplate v1.Pod
+
+// GetPodTemplate implements [PodTemplate.GetPodTemplate] by returning the same pod
+// for each call.
+func (s *staticPodTemplate) GetPodTemplate(index, count int) (*v1.Pod, error) {
+	return (*v1.Pod)(s), nil
+}
+
+func NewCreatePodWithPersistentVolumeStrategy(claimTemplate *v1.PersistentVolumeClaim, factory volumeFactory, podTemplate PodTemplate) TestPodCreateStrategy {
 	return func(ctx context.Context, client clientset.Interface, namespace string, podCount int) error {
 		return CreatePodWithPersistentVolume(ctx, client, namespace, claimTemplate, factory, podTemplate, podCount, true /* bindVolume */)
 	}