diff --git a/test/integration/scheduler_perf/config/dra/pod-with-claim-ref.yaml b/test/integration/scheduler_perf/config/dra/pod-with-claim-ref.yaml new file mode 100644 index 00000000000..98a941cc289 --- /dev/null +++ b/test/integration/scheduler_perf/config/dra/pod-with-claim-ref.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: Pod +metadata: + name: test-dra-{{.Index}} +spec: + containers: + - image: registry.k8s.io/pause:3.9 + name: pause + resources: + claims: + - name: resource + resourceClaims: + - name: resource + source: + # Five pods share access to the same claim. + resourceClaimName: test-claim-{{div .Index 5}} diff --git a/test/integration/scheduler_perf/config/dra/resourceclaim-structured.yaml b/test/integration/scheduler_perf/config/dra/resourceclaim-structured.yaml new file mode 100644 index 00000000000..cc591c6542f --- /dev/null +++ b/test/integration/scheduler_perf/config/dra/resourceclaim-structured.yaml @@ -0,0 +1,10 @@ +apiVersion: resource.k8s.io/v1alpha2 +kind: ResourceClaim +metadata: + name: test-claim-{{.Index}} +spec: + resourceClassName: test-class + parametersRef: + apiGroup: resource.k8s.io + kind: ResourceClaimParameters + name: test-claim-parameters diff --git a/test/integration/scheduler_perf/config/dra/resourceclaim.yaml b/test/integration/scheduler_perf/config/dra/resourceclaim.yaml new file mode 100644 index 00000000000..e2420f077e8 --- /dev/null +++ b/test/integration/scheduler_perf/config/dra/resourceclaim.yaml @@ -0,0 +1,6 @@ +apiVersion: resource.k8s.io/v1alpha2 +kind: ResourceClaim +metadata: + name: test-claim-{{.Index}} +spec: + resourceClassName: test-class diff --git a/test/integration/scheduler_perf/config/dra/resourceclaimparameters.yaml b/test/integration/scheduler_perf/config/dra/resourceclaimparameters.yaml index 9f3a84f9a3d..b10a1110401 100644 --- a/test/integration/scheduler_perf/config/dra/resourceclaimparameters.yaml +++ b/test/integration/scheduler_perf/config/dra/resourceclaimparameters.yaml @@ -2,6 +2,7 @@ apiVersion: resource.k8s.io/v1alpha2 kind: ResourceClaimParameters metadata: name: test-claim-parameters +shareable: true driverRequests: - driverName: test-driver.cdi.k8s.io requests: diff --git a/test/integration/scheduler_perf/config/performance-config.yaml b/test/integration/scheduler_perf/config/performance-config.yaml index 1b59952f638..727973b43ef 100644 --- a/test/integration/scheduler_perf/config/performance-config.yaml +++ b/test/integration/scheduler_perf/config/performance-config.yaml @@ -746,6 +746,7 @@ - name: SchedulingWithResourceClaimTemplate featureGates: DynamicResourceAllocation: true + # SchedulerQueueingHints: true workloadTemplate: - opcode: createNodes countParam: $nodesWithoutDRA @@ -812,6 +813,7 @@ - name: SchedulingWithMultipleResourceClaims featureGates: DynamicResourceAllocation: true + # SchedulerQueueingHints: true workloadTemplate: - opcode: createNodes countParam: $nodesWithoutDRA @@ -887,6 +889,7 @@ - name: SchedulingWithResourceClaimTemplateStructured featureGates: DynamicResourceAllocation: true + # SchedulerQueueingHints: true workloadTemplate: - opcode: createNodes countParam: $nodesWithoutDRA @@ -935,8 +938,6 @@ - name: 2000pods_100nodes labels: [performance, fast] params: - # In this testcase, the number of nodes is smaller - # than the limit for the PodScheduling slices. nodesWithDRA: 100 nodesWithoutDRA: 0 initPods: 1000 @@ -944,11 +945,102 @@ maxClaimsPerNode: 20 - name: 2000pods_200nodes params: - # In this testcase, the driver and scheduler must - # truncate the PotentialNodes and UnsuitableNodes - # slices. nodesWithDRA: 200 nodesWithoutDRA: 0 initPods: 1000 measurePods: 1000 maxClaimsPerNode: 10 + - name: 5000pods_500nodes + params: + nodesWithDRA: 500 + nodesWithoutDRA: 0 + initPods: 2500 + measurePods: 2500 + maxClaimsPerNode: 10 + +# SchedulingWithResourceClaimTemplate uses ResourceClaims +# with deterministic names that are shared between pods. +# There is a fixed ratio of 1:5 between claims and pods. +# +# The driver uses structured parameters. +- name: SchedulingWithResourceClaimStructured + featureGates: + DynamicResourceAllocation: true + # SchedulerQueueingHints: true + workloadTemplate: + - opcode: createNodes + countParam: $nodesWithoutDRA + - opcode: createNodes + nodeTemplatePath: config/dra/node-with-dra-test-driver.yaml + countParam: $nodesWithDRA + - opcode: createResourceDriver + driverName: test-driver.cdi.k8s.io + nodes: scheduler-perf-dra-* + maxClaimsPerNodeParam: $maxClaimsPerNode + structuredParameters: true + - opcode: createAny + templatePath: config/dra/resourceclass-structured.yaml + - opcode: createAny + templatePath: config/dra/resourceclaimparameters.yaml + namespace: init + - opcode: createAny + templatePath: config/dra/resourceclaim-structured.yaml + namespace: init + countParam: $initClaims + - opcode: createPods + namespace: init + countParam: $initPods + podTemplatePath: config/dra/pod-with-claim-ref.yaml + - opcode: createAny + templatePath: config/dra/resourceclaimparameters.yaml + namespace: test + - opcode: createAny + templatePath: config/dra/resourceclaim-structured.yaml + namespace: test + countParam: $measureClaims + - opcode: createPods + namespace: test + countParam: $measurePods + podTemplatePath: config/dra/pod-with-claim-ref.yaml + collectMetrics: true + workloads: + - name: fast + labels: [integration-test, fast] + params: + # This testcase runs through all code paths without + # taking too long overall. + nodesWithDRA: 1 + nodesWithoutDRA: 1 + initPods: 0 + initClaims: 0 + measurePods: 10 + measureClaims: 2 # must be measurePods / 5 + maxClaimsPerNode: 2 + - name: 2000pods_100nodes + labels: [performance, fast] + params: + nodesWithDRA: 100 + nodesWithoutDRA: 0 + initPods: 1000 + initClaims: 200 # must be initPods / 5 + measurePods: 1000 + measureClaims: 200 # must be initPods / 5 + maxClaimsPerNode: 4 + - name: 2000pods_200nodes + params: + nodesWithDRA: 200 + nodesWithoutDRA: 0 + initPods: 1000 + initClaims: 200 # must be initPods / 5 + measurePods: 1000 + measureClaims: 200 # must be measurePods / 5 + maxClaimsPerNode: 2 + - name: 5000pods_500nodes + params: + nodesWithDRA: 500 + nodesWithoutDRA: 0 + initPods: 2500 + initClaims: 500 # must be initPods / 5 + measurePods: 2500 + measureClaims: 500 # must be measurePods / 5 + maxClaimsPerNode: 2 diff --git a/test/integration/scheduler_perf/create.go b/test/integration/scheduler_perf/create.go index 16944bcc02b..4543a085732 100644 --- a/test/integration/scheduler_perf/create.go +++ b/test/integration/scheduler_perf/create.go @@ -17,8 +17,11 @@ limitations under the License. package benchmark import ( + "bytes" "context" "fmt" + "html/template" + "os" "time" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -30,6 +33,8 @@ import ( "k8s.io/client-go/restmapper" "k8s.io/klog/v2" "k8s.io/kubernetes/test/utils/ktesting" + "k8s.io/utils/ptr" + "sigs.k8s.io/yaml" ) // createAny defines an op where some object gets created from a YAML file. @@ -40,7 +45,13 @@ type createAny struct { // Namespace the object should be created in. Must be empty for cluster-scoped objects. Namespace string // Path to spec file describing the object to create. + // This will be processed with text/template. + // .Index will be in the range [0, Count-1] when creating + // more than one object. .Count is the total number of objects. TemplatePath string + // Count determines how many objects get created. Defaults to 1 if unset. + Count *int + CountParam string } var _ runnableOp = &createAny{} @@ -61,8 +72,15 @@ func (c *createAny) collectsMetrics() bool { return false } -func (c *createAny) patchParams(w *workload) (realOp, error) { - return c, c.isValid(false) +func (c createAny) patchParams(w *workload) (realOp, error) { + if c.CountParam != "" { + count, err := w.Params.get(c.CountParam[1:]) + if err != nil { + return nil, err + } + c.Count = ptr.To(count) + } + return &c, c.isValid(false) } func (c *createAny) requiredNamespaces() []string { @@ -73,8 +91,18 @@ func (c *createAny) requiredNamespaces() []string { } func (c *createAny) run(tCtx ktesting.TContext) { + count := 1 + if c.Count != nil { + count = *c.Count + } + for index := 0; index < count; index++ { + c.create(tCtx, map[string]any{"Index": index, "Count": count}) + } +} + +func (c *createAny) create(tCtx ktesting.TContext, env map[string]any) { var obj *unstructured.Unstructured - if err := getSpecFromFile(&c.TemplatePath, &obj); err != nil { + if err := getSpecFromTextTemplateFile(c.TemplatePath, env, &obj); err != nil { tCtx.Fatalf("%s: parsing failed: %v", c.TemplatePath, err) } @@ -143,3 +171,23 @@ func (c *createAny) run(tCtx ktesting.TContext) { } } } + +func getSpecFromTextTemplateFile(path string, env map[string]any, spec interface{}) error { + content, err := os.ReadFile(path) + if err != nil { + return err + } + fm := template.FuncMap{"div": func(a, b int) int { + return a / b + }} + tmpl, err := template.New("object").Funcs(fm).Parse(string(content)) + if err != nil { + return err + } + var buffer bytes.Buffer + if err := tmpl.Execute(&buffer, env); err != nil { + return err + } + + return yaml.UnmarshalStrict(buffer.Bytes(), spec) +} diff --git a/test/integration/scheduler_perf/dra.go b/test/integration/scheduler_perf/dra.go index 85d2d60a9f6..6a2d21b9f75 100644 --- a/test/integration/scheduler_perf/dra.go +++ b/test/integration/scheduler_perf/dra.go @@ -177,6 +177,7 @@ func (op *createResourceDriverOp) run(tCtx ktesting.TContext) { DriverName: op.DriverName, NodeLocal: true, MaxAllocations: op.MaxClaimsPerNode, + Shareable: true, } nodes, err := tCtx.Client().CoreV1().Nodes().List(tCtx, metav1.ListOptions{}) diff --git a/test/integration/scheduler_perf/scheduler_perf.go b/test/integration/scheduler_perf/scheduler_perf.go index 62aea4e74ee..ffda59d1abc 100644 --- a/test/integration/scheduler_perf/scheduler_perf.go +++ b/test/integration/scheduler_perf/scheduler_perf.go @@ -1432,16 +1432,12 @@ func validateTestCases(testCases []*testCase) error { } func getPodStrategy(cpo *createPodsOp) (testutils.TestPodCreateStrategy, error) { - basePod := makeBasePod() + podTemplate := testutils.StaticPodTemplate(makeBasePod()) if cpo.PodTemplatePath != nil { - var err error - basePod, err = getPodSpecFromFile(cpo.PodTemplatePath) - if err != nil { - return nil, err - } + podTemplate = podTemplateFromFile(*cpo.PodTemplatePath) } if cpo.PersistentVolumeClaimTemplatePath == nil { - return testutils.NewCustomCreatePodStrategy(basePod), nil + return testutils.NewCustomCreatePodStrategy(podTemplate), nil } pvTemplate, err := getPersistentVolumeSpecFromFile(cpo.PersistentVolumeTemplatePath) @@ -1452,7 +1448,7 @@ func getPodStrategy(cpo *createPodsOp) (testutils.TestPodCreateStrategy, error) if err != nil { return nil, err } - return testutils.NewCreatePodWithPersistentVolumeStrategy(pvcTemplate, getCustomVolumeFactory(pvTemplate), basePod), nil + return testutils.NewCreatePodWithPersistentVolumeStrategy(pvcTemplate, getCustomVolumeFactory(pvTemplate), podTemplate), nil } func getNodeSpecFromFile(path *string) (*v1.Node, error) { @@ -1463,9 +1459,11 @@ func getNodeSpecFromFile(path *string) (*v1.Node, error) { return nodeSpec, nil } -func getPodSpecFromFile(path *string) (*v1.Pod, error) { +type podTemplateFromFile string + +func (f podTemplateFromFile) GetPodTemplate(index, count int) (*v1.Pod, error) { podSpec := &v1.Pod{} - if err := getSpecFromFile(path, podSpec); err != nil { + if err := getSpecFromTextTemplateFile(string(f), map[string]any{"Index": index, "Count": count}, podSpec); err != nil { return nil, fmt.Errorf("parsing Pod: %w", err) } return podSpec, nil diff --git a/test/utils/runners.go b/test/utils/runners.go index ad264a2b6d5..042c52ec4f4 100644 --- a/test/utils/runners.go +++ b/test/utils/runners.go @@ -1235,14 +1235,22 @@ func makeCreatePod(client clientset.Interface, namespace string, podTemplate *v1 return nil } -func CreatePod(ctx context.Context, client clientset.Interface, namespace string, podCount int, podTemplate *v1.Pod) error { +func CreatePod(ctx context.Context, client clientset.Interface, namespace string, podCount int, podTemplate PodTemplate) error { var createError error lock := sync.Mutex{} createPodFunc := func(i int) { + pod, err := podTemplate.GetPodTemplate(i, podCount) + if err != nil { + lock.Lock() + defer lock.Unlock() + createError = err + return + } + pod = pod.DeepCopy() // client-go writes into the object that is passed to Create, // causing a data race unless we create a new copy for each // parallel call. - if err := makeCreatePod(client, namespace, podTemplate.DeepCopy()); err != nil { + if err := makeCreatePod(client, namespace, pod); err != nil { lock.Lock() defer lock.Unlock() createError = err @@ -1257,7 +1265,7 @@ func CreatePod(ctx context.Context, client clientset.Interface, namespace string return createError } -func CreatePodWithPersistentVolume(ctx context.Context, client clientset.Interface, namespace string, claimTemplate *v1.PersistentVolumeClaim, factory volumeFactory, podTemplate *v1.Pod, count int, bindVolume bool) error { +func CreatePodWithPersistentVolume(ctx context.Context, client clientset.Interface, namespace string, claimTemplate *v1.PersistentVolumeClaim, factory volumeFactory, podTemplate PodTemplate, count int, bindVolume bool) error { var createError error lock := sync.Mutex{} createPodFunc := func(i int) { @@ -1318,7 +1326,14 @@ func CreatePodWithPersistentVolume(ctx context.Context, client clientset.Interfa } // pod - pod := podTemplate.DeepCopy() + pod, err := podTemplate.GetPodTemplate(i, count) + if err != nil { + lock.Lock() + defer lock.Unlock() + createError = fmt.Errorf("error getting pod template: %s", err) + return + } + pod = pod.DeepCopy() pod.Spec.Volumes = []v1.Volume{ { Name: "vol", @@ -1345,7 +1360,7 @@ func CreatePodWithPersistentVolume(ctx context.Context, client clientset.Interfa return createError } -func NewCustomCreatePodStrategy(podTemplate *v1.Pod) TestPodCreateStrategy { +func NewCustomCreatePodStrategy(podTemplate PodTemplate) TestPodCreateStrategy { return func(ctx context.Context, client clientset.Interface, namespace string, podCount int) error { return CreatePod(ctx, client, namespace, podCount, podTemplate) } @@ -1354,7 +1369,32 @@ func NewCustomCreatePodStrategy(podTemplate *v1.Pod) TestPodCreateStrategy { // volumeFactory creates an unique PersistentVolume for given integer. type volumeFactory func(uniqueID int) *v1.PersistentVolume -func NewCreatePodWithPersistentVolumeStrategy(claimTemplate *v1.PersistentVolumeClaim, factory volumeFactory, podTemplate *v1.Pod) TestPodCreateStrategy { +// PodTemplate is responsible for creating a v1.Pod instance that is ready +// to be sent to the API server. +type PodTemplate interface { + // GetPodTemplate returns a pod template for one out of many different pods. + // Pods with numbers in the range [index, index+count-1] will be created + // based on what GetPodTemplate returns. It gets called multiple times + // with a fixed index and increasing count parameters. This number can, + // but doesn't have to be, used to modify parts of the pod spec like + // for example a named reference to some other object. + GetPodTemplate(index, count int) (*v1.Pod, error) +} + +// StaticPodTemplate returns an implementation of PodTemplate for a fixed pod that is the same regardless of the index. +func StaticPodTemplate(pod *v1.Pod) PodTemplate { + return (*staticPodTemplate)(pod) +} + +type staticPodTemplate v1.Pod + +// GetPodTemplate implements [PodTemplate.GetPodTemplate] by returning the same pod +// for each call. +func (s *staticPodTemplate) GetPodTemplate(index, count int) (*v1.Pod, error) { + return (*v1.Pod)(s), nil +} + +func NewCreatePodWithPersistentVolumeStrategy(claimTemplate *v1.PersistentVolumeClaim, factory volumeFactory, podTemplate PodTemplate) TestPodCreateStrategy { return func(ctx context.Context, client clientset.Interface, namespace string, podCount int) error { return CreatePodWithPersistentVolume(ctx, client, namespace, claimTemplate, factory, podTemplate, podCount, true /* bindVolume */) }