From d88a15308613ebbe0d604bdadac810145eb157a9 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Fri, 26 Apr 2024 08:31:10 +0200 Subject: [PATCH] scheduler_perf: add DRA structured parameters test with shared claims Several pods sharing the same claim is not common, but can be useful and thus should get tested. Before, createPods and createAny operations were not able to do this because each generated object was the same. What we need are different, predictable names of the claims (from createAny) and different references to those in the pods (from createPods). Now text/template processing with the index number of the pod respectively claim as input is used to inject these varying fields. A "div" function is needed to use the same claim in several different pods. While at it, some existing test cases get cleaned up a bit (removal of incorrect comments, adding comments for testing with queuing hints). --- .../config/dra/pod-with-claim-ref.yaml | 16 +++ .../config/dra/resourceclaim-structured.yaml | 10 ++ .../config/dra/resourceclaim.yaml | 6 ++ .../config/dra/resourceclaimparameters.yaml | 1 + .../config/performance-config.yaml | 102 +++++++++++++++++- test/integration/scheduler_perf/create.go | 54 +++++++++- test/integration/scheduler_perf/dra.go | 1 + .../scheduler_perf/scheduler_perf.go | 18 ++-- test/utils/runners.go | 52 +++++++-- 9 files changed, 236 insertions(+), 24 deletions(-) create mode 100644 test/integration/scheduler_perf/config/dra/pod-with-claim-ref.yaml create mode 100644 test/integration/scheduler_perf/config/dra/resourceclaim-structured.yaml create mode 100644 test/integration/scheduler_perf/config/dra/resourceclaim.yaml diff --git a/test/integration/scheduler_perf/config/dra/pod-with-claim-ref.yaml b/test/integration/scheduler_perf/config/dra/pod-with-claim-ref.yaml new file mode 100644 index 00000000000..98a941cc289 --- /dev/null +++ b/test/integration/scheduler_perf/config/dra/pod-with-claim-ref.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: Pod +metadata: + name: test-dra-{{.Index}} +spec: + containers: + - image: registry.k8s.io/pause:3.9 + name: pause + resources: + claims: + - name: resource + resourceClaims: + - name: resource + source: + # Five pods share access to the same claim. + resourceClaimName: test-claim-{{div .Index 5}} diff --git a/test/integration/scheduler_perf/config/dra/resourceclaim-structured.yaml b/test/integration/scheduler_perf/config/dra/resourceclaim-structured.yaml new file mode 100644 index 00000000000..cc591c6542f --- /dev/null +++ b/test/integration/scheduler_perf/config/dra/resourceclaim-structured.yaml @@ -0,0 +1,10 @@ +apiVersion: resource.k8s.io/v1alpha2 +kind: ResourceClaim +metadata: + name: test-claim-{{.Index}} +spec: + resourceClassName: test-class + parametersRef: + apiGroup: resource.k8s.io + kind: ResourceClaimParameters + name: test-claim-parameters diff --git a/test/integration/scheduler_perf/config/dra/resourceclaim.yaml b/test/integration/scheduler_perf/config/dra/resourceclaim.yaml new file mode 100644 index 00000000000..e2420f077e8 --- /dev/null +++ b/test/integration/scheduler_perf/config/dra/resourceclaim.yaml @@ -0,0 +1,6 @@ +apiVersion: resource.k8s.io/v1alpha2 +kind: ResourceClaim +metadata: + name: test-claim-{{.Index}} +spec: + resourceClassName: test-class diff --git a/test/integration/scheduler_perf/config/dra/resourceclaimparameters.yaml b/test/integration/scheduler_perf/config/dra/resourceclaimparameters.yaml index 9f3a84f9a3d..b10a1110401 100644 --- a/test/integration/scheduler_perf/config/dra/resourceclaimparameters.yaml +++ b/test/integration/scheduler_perf/config/dra/resourceclaimparameters.yaml @@ -2,6 +2,7 @@ apiVersion: resource.k8s.io/v1alpha2 kind: ResourceClaimParameters metadata: name: test-claim-parameters +shareable: true driverRequests: - driverName: test-driver.cdi.k8s.io requests: diff --git a/test/integration/scheduler_perf/config/performance-config.yaml b/test/integration/scheduler_perf/config/performance-config.yaml index 1b59952f638..727973b43ef 100644 --- a/test/integration/scheduler_perf/config/performance-config.yaml +++ b/test/integration/scheduler_perf/config/performance-config.yaml @@ -746,6 +746,7 @@ - name: SchedulingWithResourceClaimTemplate featureGates: DynamicResourceAllocation: true + # SchedulerQueueingHints: true workloadTemplate: - opcode: createNodes countParam: $nodesWithoutDRA @@ -812,6 +813,7 @@ - name: SchedulingWithMultipleResourceClaims featureGates: DynamicResourceAllocation: true + # SchedulerQueueingHints: true workloadTemplate: - opcode: createNodes countParam: $nodesWithoutDRA @@ -887,6 +889,7 @@ - name: SchedulingWithResourceClaimTemplateStructured featureGates: DynamicResourceAllocation: true + # SchedulerQueueingHints: true workloadTemplate: - opcode: createNodes countParam: $nodesWithoutDRA @@ -935,8 +938,6 @@ - name: 2000pods_100nodes labels: [performance, fast] params: - # In this testcase, the number of nodes is smaller - # than the limit for the PodScheduling slices. nodesWithDRA: 100 nodesWithoutDRA: 0 initPods: 1000 @@ -944,11 +945,102 @@ maxClaimsPerNode: 20 - name: 2000pods_200nodes params: - # In this testcase, the driver and scheduler must - # truncate the PotentialNodes and UnsuitableNodes - # slices. nodesWithDRA: 200 nodesWithoutDRA: 0 initPods: 1000 measurePods: 1000 maxClaimsPerNode: 10 + - name: 5000pods_500nodes + params: + nodesWithDRA: 500 + nodesWithoutDRA: 0 + initPods: 2500 + measurePods: 2500 + maxClaimsPerNode: 10 + +# SchedulingWithResourceClaimTemplate uses ResourceClaims +# with deterministic names that are shared between pods. +# There is a fixed ratio of 1:5 between claims and pods. +# +# The driver uses structured parameters. +- name: SchedulingWithResourceClaimStructured + featureGates: + DynamicResourceAllocation: true + # SchedulerQueueingHints: true + workloadTemplate: + - opcode: createNodes + countParam: $nodesWithoutDRA + - opcode: createNodes + nodeTemplatePath: config/dra/node-with-dra-test-driver.yaml + countParam: $nodesWithDRA + - opcode: createResourceDriver + driverName: test-driver.cdi.k8s.io + nodes: scheduler-perf-dra-* + maxClaimsPerNodeParam: $maxClaimsPerNode + structuredParameters: true + - opcode: createAny + templatePath: config/dra/resourceclass-structured.yaml + - opcode: createAny + templatePath: config/dra/resourceclaimparameters.yaml + namespace: init + - opcode: createAny + templatePath: config/dra/resourceclaim-structured.yaml + namespace: init + countParam: $initClaims + - opcode: createPods + namespace: init + countParam: $initPods + podTemplatePath: config/dra/pod-with-claim-ref.yaml + - opcode: createAny + templatePath: config/dra/resourceclaimparameters.yaml + namespace: test + - opcode: createAny + templatePath: config/dra/resourceclaim-structured.yaml + namespace: test + countParam: $measureClaims + - opcode: createPods + namespace: test + countParam: $measurePods + podTemplatePath: config/dra/pod-with-claim-ref.yaml + collectMetrics: true + workloads: + - name: fast + labels: [integration-test, fast] + params: + # This testcase runs through all code paths without + # taking too long overall. + nodesWithDRA: 1 + nodesWithoutDRA: 1 + initPods: 0 + initClaims: 0 + measurePods: 10 + measureClaims: 2 # must be measurePods / 5 + maxClaimsPerNode: 2 + - name: 2000pods_100nodes + labels: [performance, fast] + params: + nodesWithDRA: 100 + nodesWithoutDRA: 0 + initPods: 1000 + initClaims: 200 # must be initPods / 5 + measurePods: 1000 + measureClaims: 200 # must be initPods / 5 + maxClaimsPerNode: 4 + - name: 2000pods_200nodes + params: + nodesWithDRA: 200 + nodesWithoutDRA: 0 + initPods: 1000 + initClaims: 200 # must be initPods / 5 + measurePods: 1000 + measureClaims: 200 # must be measurePods / 5 + maxClaimsPerNode: 2 + - name: 5000pods_500nodes + params: + nodesWithDRA: 500 + nodesWithoutDRA: 0 + initPods: 2500 + initClaims: 500 # must be initPods / 5 + measurePods: 2500 + measureClaims: 500 # must be measurePods / 5 + maxClaimsPerNode: 2 diff --git a/test/integration/scheduler_perf/create.go b/test/integration/scheduler_perf/create.go index 16944bcc02b..4543a085732 100644 --- a/test/integration/scheduler_perf/create.go +++ b/test/integration/scheduler_perf/create.go @@ -17,8 +17,11 @@ limitations under the License. package benchmark import ( + "bytes" "context" "fmt" + "html/template" + "os" "time" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -30,6 +33,8 @@ import ( "k8s.io/client-go/restmapper" "k8s.io/klog/v2" "k8s.io/kubernetes/test/utils/ktesting" + "k8s.io/utils/ptr" + "sigs.k8s.io/yaml" ) // createAny defines an op where some object gets created from a YAML file. @@ -40,7 +45,13 @@ type createAny struct { // Namespace the object should be created in. Must be empty for cluster-scoped objects. Namespace string // Path to spec file describing the object to create. + // This will be processed with text/template. + // .Index will be in the range [0, Count-1] when creating + // more than one object. .Count is the total number of objects. TemplatePath string + // Count determines how many objects get created. Defaults to 1 if unset. + Count *int + CountParam string } var _ runnableOp = &createAny{} @@ -61,8 +72,15 @@ func (c *createAny) collectsMetrics() bool { return false } -func (c *createAny) patchParams(w *workload) (realOp, error) { - return c, c.isValid(false) +func (c createAny) patchParams(w *workload) (realOp, error) { + if c.CountParam != "" { + count, err := w.Params.get(c.CountParam[1:]) + if err != nil { + return nil, err + } + c.Count = ptr.To(count) + } + return &c, c.isValid(false) } func (c *createAny) requiredNamespaces() []string { @@ -73,8 +91,18 @@ func (c *createAny) requiredNamespaces() []string { } func (c *createAny) run(tCtx ktesting.TContext) { + count := 1 + if c.Count != nil { + count = *c.Count + } + for index := 0; index < count; index++ { + c.create(tCtx, map[string]any{"Index": index, "Count": count}) + } +} + +func (c *createAny) create(tCtx ktesting.TContext, env map[string]any) { var obj *unstructured.Unstructured - if err := getSpecFromFile(&c.TemplatePath, &obj); err != nil { + if err := getSpecFromTextTemplateFile(c.TemplatePath, env, &obj); err != nil { tCtx.Fatalf("%s: parsing failed: %v", c.TemplatePath, err) } @@ -143,3 +171,23 @@ func (c *createAny) run(tCtx ktesting.TContext) { } } } + +func getSpecFromTextTemplateFile(path string, env map[string]any, spec interface{}) error { + content, err := os.ReadFile(path) + if err != nil { + return err + } + fm := template.FuncMap{"div": func(a, b int) int { + return a / b + }} + tmpl, err := template.New("object").Funcs(fm).Parse(string(content)) + if err != nil { + return err + } + var buffer bytes.Buffer + if err := tmpl.Execute(&buffer, env); err != nil { + return err + } + + return yaml.UnmarshalStrict(buffer.Bytes(), spec) +} diff --git a/test/integration/scheduler_perf/dra.go b/test/integration/scheduler_perf/dra.go index 85d2d60a9f6..6a2d21b9f75 100644 --- a/test/integration/scheduler_perf/dra.go +++ b/test/integration/scheduler_perf/dra.go @@ -177,6 +177,7 @@ func (op *createResourceDriverOp) run(tCtx ktesting.TContext) { DriverName: op.DriverName, NodeLocal: true, MaxAllocations: op.MaxClaimsPerNode, + Shareable: true, } nodes, err := tCtx.Client().CoreV1().Nodes().List(tCtx, metav1.ListOptions{}) diff --git a/test/integration/scheduler_perf/scheduler_perf.go b/test/integration/scheduler_perf/scheduler_perf.go index 62aea4e74ee..ffda59d1abc 100644 --- a/test/integration/scheduler_perf/scheduler_perf.go +++ b/test/integration/scheduler_perf/scheduler_perf.go @@ -1432,16 +1432,12 @@ func validateTestCases(testCases []*testCase) error { } func getPodStrategy(cpo *createPodsOp) (testutils.TestPodCreateStrategy, error) { - basePod := makeBasePod() + podTemplate := testutils.StaticPodTemplate(makeBasePod()) if cpo.PodTemplatePath != nil { - var err error - basePod, err = getPodSpecFromFile(cpo.PodTemplatePath) - if err != nil { - return nil, err - } + podTemplate = podTemplateFromFile(*cpo.PodTemplatePath) } if cpo.PersistentVolumeClaimTemplatePath == nil { - return testutils.NewCustomCreatePodStrategy(basePod), nil + return testutils.NewCustomCreatePodStrategy(podTemplate), nil } pvTemplate, err := getPersistentVolumeSpecFromFile(cpo.PersistentVolumeTemplatePath) @@ -1452,7 +1448,7 @@ func getPodStrategy(cpo *createPodsOp) (testutils.TestPodCreateStrategy, error) if err != nil { return nil, err } - return testutils.NewCreatePodWithPersistentVolumeStrategy(pvcTemplate, getCustomVolumeFactory(pvTemplate), basePod), nil + return testutils.NewCreatePodWithPersistentVolumeStrategy(pvcTemplate, getCustomVolumeFactory(pvTemplate), podTemplate), nil } func getNodeSpecFromFile(path *string) (*v1.Node, error) { @@ -1463,9 +1459,11 @@ func getNodeSpecFromFile(path *string) (*v1.Node, error) { return nodeSpec, nil } -func getPodSpecFromFile(path *string) (*v1.Pod, error) { +type podTemplateFromFile string + +func (f podTemplateFromFile) GetPodTemplate(index, count int) (*v1.Pod, error) { podSpec := &v1.Pod{} - if err := getSpecFromFile(path, podSpec); err != nil { + if err := getSpecFromTextTemplateFile(string(f), map[string]any{"Index": index, "Count": count}, podSpec); err != nil { return nil, fmt.Errorf("parsing Pod: %w", err) } return podSpec, nil diff --git a/test/utils/runners.go b/test/utils/runners.go index ad264a2b6d5..042c52ec4f4 100644 --- a/test/utils/runners.go +++ b/test/utils/runners.go @@ -1235,14 +1235,22 @@ func makeCreatePod(client clientset.Interface, namespace string, podTemplate *v1 return nil } -func CreatePod(ctx context.Context, client clientset.Interface, namespace string, podCount int, podTemplate *v1.Pod) error { +func CreatePod(ctx context.Context, client clientset.Interface, namespace string, podCount int, podTemplate PodTemplate) error { var createError error lock := sync.Mutex{} createPodFunc := func(i int) { + pod, err := podTemplate.GetPodTemplate(i, podCount) + if err != nil { + lock.Lock() + defer lock.Unlock() + createError = err + return + } + pod = pod.DeepCopy() // client-go writes into the object that is passed to Create, // causing a data race unless we create a new copy for each // parallel call. - if err := makeCreatePod(client, namespace, podTemplate.DeepCopy()); err != nil { + if err := makeCreatePod(client, namespace, pod); err != nil { lock.Lock() defer lock.Unlock() createError = err @@ -1257,7 +1265,7 @@ func CreatePod(ctx context.Context, client clientset.Interface, namespace string return createError } -func CreatePodWithPersistentVolume(ctx context.Context, client clientset.Interface, namespace string, claimTemplate *v1.PersistentVolumeClaim, factory volumeFactory, podTemplate *v1.Pod, count int, bindVolume bool) error { +func CreatePodWithPersistentVolume(ctx context.Context, client clientset.Interface, namespace string, claimTemplate *v1.PersistentVolumeClaim, factory volumeFactory, podTemplate PodTemplate, count int, bindVolume bool) error { var createError error lock := sync.Mutex{} createPodFunc := func(i int) { @@ -1318,7 +1326,14 @@ func CreatePodWithPersistentVolume(ctx context.Context, client clientset.Interfa } // pod - pod := podTemplate.DeepCopy() + pod, err := podTemplate.GetPodTemplate(i, count) + if err != nil { + lock.Lock() + defer lock.Unlock() + createError = fmt.Errorf("error getting pod template: %s", err) + return + } + pod = pod.DeepCopy() pod.Spec.Volumes = []v1.Volume{ { Name: "vol", @@ -1345,7 +1360,7 @@ func CreatePodWithPersistentVolume(ctx context.Context, client clientset.Interfa return createError } -func NewCustomCreatePodStrategy(podTemplate *v1.Pod) TestPodCreateStrategy { +func NewCustomCreatePodStrategy(podTemplate PodTemplate) TestPodCreateStrategy { return func(ctx context.Context, client clientset.Interface, namespace string, podCount int) error { return CreatePod(ctx, client, namespace, podCount, podTemplate) } @@ -1354,7 +1369,32 @@ func NewCustomCreatePodStrategy(podTemplate *v1.Pod) TestPodCreateStrategy { // volumeFactory creates an unique PersistentVolume for given integer. type volumeFactory func(uniqueID int) *v1.PersistentVolume -func NewCreatePodWithPersistentVolumeStrategy(claimTemplate *v1.PersistentVolumeClaim, factory volumeFactory, podTemplate *v1.Pod) TestPodCreateStrategy { +// PodTemplate is responsible for creating a v1.Pod instance that is ready +// to be sent to the API server. +type PodTemplate interface { + // GetPodTemplate returns a pod template for one out of many different pods. + // Pods with numbers in the range [index, index+count-1] will be created + // based on what GetPodTemplate returns. It gets called multiple times + // with a fixed index and increasing count parameters. This number can, + // but doesn't have to be, used to modify parts of the pod spec like + // for example a named reference to some other object. + GetPodTemplate(index, count int) (*v1.Pod, error) +} + +// StaticPodTemplate returns an implementation of PodTemplate for a fixed pod that is the same regardless of the index. +func StaticPodTemplate(pod *v1.Pod) PodTemplate { + return (*staticPodTemplate)(pod) +} + +type staticPodTemplate v1.Pod + +// GetPodTemplate implements [PodTemplate.GetPodTemplate] by returning the same pod +// for each call. +func (s *staticPodTemplate) GetPodTemplate(index, count int) (*v1.Pod, error) { + return (*v1.Pod)(s), nil +} + +func NewCreatePodWithPersistentVolumeStrategy(claimTemplate *v1.PersistentVolumeClaim, factory volumeFactory, podTemplate PodTemplate) TestPodCreateStrategy { return func(ctx context.Context, client clientset.Interface, namespace string, podCount int) error { return CreatePodWithPersistentVolume(ctx, client, namespace, claimTemplate, factory, podTemplate, podCount, true /* bindVolume */) }