scheduler_perf: add DRA structured parameters test with shared claims

Several pods sharing the same claim is not common, but can be useful and thus
should get tested.

Before, createPods and createAny operations were not able to do this because
each generated object was the same. What we need are different, predictable
names of the claims (from createAny) and different references to those in the
pods (from createPods). Now text/template processing with the index number of
the pod respectively claim as input is used to inject these varying fields. A
"div" function is needed to use the same claim in several different pods.

While at it, some existing test cases get cleaned up a bit (removal of
incorrect comments, adding comments for testing with queuing hints).
This commit is contained in:
Patrick Ohly 2024-04-26 08:31:10 +02:00
parent b498eb9740
commit d88a153086
9 changed files with 236 additions and 24 deletions

View File

@ -0,0 +1,16 @@
apiVersion: v1
kind: Pod
metadata:
name: test-dra-{{.Index}}
spec:
containers:
- image: registry.k8s.io/pause:3.9
name: pause
resources:
claims:
- name: resource
resourceClaims:
- name: resource
source:
# Five pods share access to the same claim.
resourceClaimName: test-claim-{{div .Index 5}}

View File

@ -0,0 +1,10 @@
apiVersion: resource.k8s.io/v1alpha2
kind: ResourceClaim
metadata:
name: test-claim-{{.Index}}
spec:
resourceClassName: test-class
parametersRef:
apiGroup: resource.k8s.io
kind: ResourceClaimParameters
name: test-claim-parameters

View File

@ -0,0 +1,6 @@
apiVersion: resource.k8s.io/v1alpha2
kind: ResourceClaim
metadata:
name: test-claim-{{.Index}}
spec:
resourceClassName: test-class

View File

@ -2,6 +2,7 @@ apiVersion: resource.k8s.io/v1alpha2
kind: ResourceClaimParameters
metadata:
name: test-claim-parameters
shareable: true
driverRequests:
- driverName: test-driver.cdi.k8s.io
requests:

View File

@ -746,6 +746,7 @@
- name: SchedulingWithResourceClaimTemplate
featureGates:
DynamicResourceAllocation: true
# SchedulerQueueingHints: true
workloadTemplate:
- opcode: createNodes
countParam: $nodesWithoutDRA
@ -812,6 +813,7 @@
- name: SchedulingWithMultipleResourceClaims
featureGates:
DynamicResourceAllocation: true
# SchedulerQueueingHints: true
workloadTemplate:
- opcode: createNodes
countParam: $nodesWithoutDRA
@ -887,6 +889,7 @@
- name: SchedulingWithResourceClaimTemplateStructured
featureGates:
DynamicResourceAllocation: true
# SchedulerQueueingHints: true
workloadTemplate:
- opcode: createNodes
countParam: $nodesWithoutDRA
@ -935,8 +938,6 @@
- name: 2000pods_100nodes
labels: [performance, fast]
params:
# In this testcase, the number of nodes is smaller
# than the limit for the PodScheduling slices.
nodesWithDRA: 100
nodesWithoutDRA: 0
initPods: 1000
@ -944,11 +945,102 @@
maxClaimsPerNode: 20
- name: 2000pods_200nodes
params:
# In this testcase, the driver and scheduler must
# truncate the PotentialNodes and UnsuitableNodes
# slices.
nodesWithDRA: 200
nodesWithoutDRA: 0
initPods: 1000
measurePods: 1000
maxClaimsPerNode: 10
- name: 5000pods_500nodes
params:
nodesWithDRA: 500
nodesWithoutDRA: 0
initPods: 2500
measurePods: 2500
maxClaimsPerNode: 10
# SchedulingWithResourceClaimTemplate uses ResourceClaims
# with deterministic names that are shared between pods.
# There is a fixed ratio of 1:5 between claims and pods.
#
# The driver uses structured parameters.
- name: SchedulingWithResourceClaimStructured
featureGates:
DynamicResourceAllocation: true
# SchedulerQueueingHints: true
workloadTemplate:
- opcode: createNodes
countParam: $nodesWithoutDRA
- opcode: createNodes
nodeTemplatePath: config/dra/node-with-dra-test-driver.yaml
countParam: $nodesWithDRA
- opcode: createResourceDriver
driverName: test-driver.cdi.k8s.io
nodes: scheduler-perf-dra-*
maxClaimsPerNodeParam: $maxClaimsPerNode
structuredParameters: true
- opcode: createAny
templatePath: config/dra/resourceclass-structured.yaml
- opcode: createAny
templatePath: config/dra/resourceclaimparameters.yaml
namespace: init
- opcode: createAny
templatePath: config/dra/resourceclaim-structured.yaml
namespace: init
countParam: $initClaims
- opcode: createPods
namespace: init
countParam: $initPods
podTemplatePath: config/dra/pod-with-claim-ref.yaml
- opcode: createAny
templatePath: config/dra/resourceclaimparameters.yaml
namespace: test
- opcode: createAny
templatePath: config/dra/resourceclaim-structured.yaml
namespace: test
countParam: $measureClaims
- opcode: createPods
namespace: test
countParam: $measurePods
podTemplatePath: config/dra/pod-with-claim-ref.yaml
collectMetrics: true
workloads:
- name: fast
labels: [integration-test, fast]
params:
# This testcase runs through all code paths without
# taking too long overall.
nodesWithDRA: 1
nodesWithoutDRA: 1
initPods: 0
initClaims: 0
measurePods: 10
measureClaims: 2 # must be measurePods / 5
maxClaimsPerNode: 2
- name: 2000pods_100nodes
labels: [performance, fast]
params:
nodesWithDRA: 100
nodesWithoutDRA: 0
initPods: 1000
initClaims: 200 # must be initPods / 5
measurePods: 1000
measureClaims: 200 # must be initPods / 5
maxClaimsPerNode: 4
- name: 2000pods_200nodes
params:
nodesWithDRA: 200
nodesWithoutDRA: 0
initPods: 1000
initClaims: 200 # must be initPods / 5
measurePods: 1000
measureClaims: 200 # must be measurePods / 5
maxClaimsPerNode: 2
- name: 5000pods_500nodes
params:
nodesWithDRA: 500
nodesWithoutDRA: 0
initPods: 2500
initClaims: 500 # must be initPods / 5
measurePods: 2500
measureClaims: 500 # must be measurePods / 5
maxClaimsPerNode: 2

View File

@ -17,8 +17,11 @@ limitations under the License.
package benchmark
import (
"bytes"
"context"
"fmt"
"html/template"
"os"
"time"
apierrors "k8s.io/apimachinery/pkg/api/errors"
@ -30,6 +33,8 @@ import (
"k8s.io/client-go/restmapper"
"k8s.io/klog/v2"
"k8s.io/kubernetes/test/utils/ktesting"
"k8s.io/utils/ptr"
"sigs.k8s.io/yaml"
)
// createAny defines an op where some object gets created from a YAML file.
@ -40,7 +45,13 @@ type createAny struct {
// Namespace the object should be created in. Must be empty for cluster-scoped objects.
Namespace string
// Path to spec file describing the object to create.
// This will be processed with text/template.
// .Index will be in the range [0, Count-1] when creating
// more than one object. .Count is the total number of objects.
TemplatePath string
// Count determines how many objects get created. Defaults to 1 if unset.
Count *int
CountParam string
}
var _ runnableOp = &createAny{}
@ -61,8 +72,15 @@ func (c *createAny) collectsMetrics() bool {
return false
}
func (c *createAny) patchParams(w *workload) (realOp, error) {
return c, c.isValid(false)
func (c createAny) patchParams(w *workload) (realOp, error) {
if c.CountParam != "" {
count, err := w.Params.get(c.CountParam[1:])
if err != nil {
return nil, err
}
c.Count = ptr.To(count)
}
return &c, c.isValid(false)
}
func (c *createAny) requiredNamespaces() []string {
@ -73,8 +91,18 @@ func (c *createAny) requiredNamespaces() []string {
}
func (c *createAny) run(tCtx ktesting.TContext) {
count := 1
if c.Count != nil {
count = *c.Count
}
for index := 0; index < count; index++ {
c.create(tCtx, map[string]any{"Index": index, "Count": count})
}
}
func (c *createAny) create(tCtx ktesting.TContext, env map[string]any) {
var obj *unstructured.Unstructured
if err := getSpecFromFile(&c.TemplatePath, &obj); err != nil {
if err := getSpecFromTextTemplateFile(c.TemplatePath, env, &obj); err != nil {
tCtx.Fatalf("%s: parsing failed: %v", c.TemplatePath, err)
}
@ -143,3 +171,23 @@ func (c *createAny) run(tCtx ktesting.TContext) {
}
}
}
func getSpecFromTextTemplateFile(path string, env map[string]any, spec interface{}) error {
content, err := os.ReadFile(path)
if err != nil {
return err
}
fm := template.FuncMap{"div": func(a, b int) int {
return a / b
}}
tmpl, err := template.New("object").Funcs(fm).Parse(string(content))
if err != nil {
return err
}
var buffer bytes.Buffer
if err := tmpl.Execute(&buffer, env); err != nil {
return err
}
return yaml.UnmarshalStrict(buffer.Bytes(), spec)
}

View File

@ -177,6 +177,7 @@ func (op *createResourceDriverOp) run(tCtx ktesting.TContext) {
DriverName: op.DriverName,
NodeLocal: true,
MaxAllocations: op.MaxClaimsPerNode,
Shareable: true,
}
nodes, err := tCtx.Client().CoreV1().Nodes().List(tCtx, metav1.ListOptions{})

View File

@ -1432,16 +1432,12 @@ func validateTestCases(testCases []*testCase) error {
}
func getPodStrategy(cpo *createPodsOp) (testutils.TestPodCreateStrategy, error) {
basePod := makeBasePod()
podTemplate := testutils.StaticPodTemplate(makeBasePod())
if cpo.PodTemplatePath != nil {
var err error
basePod, err = getPodSpecFromFile(cpo.PodTemplatePath)
if err != nil {
return nil, err
}
podTemplate = podTemplateFromFile(*cpo.PodTemplatePath)
}
if cpo.PersistentVolumeClaimTemplatePath == nil {
return testutils.NewCustomCreatePodStrategy(basePod), nil
return testutils.NewCustomCreatePodStrategy(podTemplate), nil
}
pvTemplate, err := getPersistentVolumeSpecFromFile(cpo.PersistentVolumeTemplatePath)
@ -1452,7 +1448,7 @@ func getPodStrategy(cpo *createPodsOp) (testutils.TestPodCreateStrategy, error)
if err != nil {
return nil, err
}
return testutils.NewCreatePodWithPersistentVolumeStrategy(pvcTemplate, getCustomVolumeFactory(pvTemplate), basePod), nil
return testutils.NewCreatePodWithPersistentVolumeStrategy(pvcTemplate, getCustomVolumeFactory(pvTemplate), podTemplate), nil
}
func getNodeSpecFromFile(path *string) (*v1.Node, error) {
@ -1463,9 +1459,11 @@ func getNodeSpecFromFile(path *string) (*v1.Node, error) {
return nodeSpec, nil
}
func getPodSpecFromFile(path *string) (*v1.Pod, error) {
type podTemplateFromFile string
func (f podTemplateFromFile) GetPodTemplate(index, count int) (*v1.Pod, error) {
podSpec := &v1.Pod{}
if err := getSpecFromFile(path, podSpec); err != nil {
if err := getSpecFromTextTemplateFile(string(f), map[string]any{"Index": index, "Count": count}, podSpec); err != nil {
return nil, fmt.Errorf("parsing Pod: %w", err)
}
return podSpec, nil

View File

@ -1235,14 +1235,22 @@ func makeCreatePod(client clientset.Interface, namespace string, podTemplate *v1
return nil
}
func CreatePod(ctx context.Context, client clientset.Interface, namespace string, podCount int, podTemplate *v1.Pod) error {
func CreatePod(ctx context.Context, client clientset.Interface, namespace string, podCount int, podTemplate PodTemplate) error {
var createError error
lock := sync.Mutex{}
createPodFunc := func(i int) {
pod, err := podTemplate.GetPodTemplate(i, podCount)
if err != nil {
lock.Lock()
defer lock.Unlock()
createError = err
return
}
pod = pod.DeepCopy()
// client-go writes into the object that is passed to Create,
// causing a data race unless we create a new copy for each
// parallel call.
if err := makeCreatePod(client, namespace, podTemplate.DeepCopy()); err != nil {
if err := makeCreatePod(client, namespace, pod); err != nil {
lock.Lock()
defer lock.Unlock()
createError = err
@ -1257,7 +1265,7 @@ func CreatePod(ctx context.Context, client clientset.Interface, namespace string
return createError
}
func CreatePodWithPersistentVolume(ctx context.Context, client clientset.Interface, namespace string, claimTemplate *v1.PersistentVolumeClaim, factory volumeFactory, podTemplate *v1.Pod, count int, bindVolume bool) error {
func CreatePodWithPersistentVolume(ctx context.Context, client clientset.Interface, namespace string, claimTemplate *v1.PersistentVolumeClaim, factory volumeFactory, podTemplate PodTemplate, count int, bindVolume bool) error {
var createError error
lock := sync.Mutex{}
createPodFunc := func(i int) {
@ -1318,7 +1326,14 @@ func CreatePodWithPersistentVolume(ctx context.Context, client clientset.Interfa
}
// pod
pod := podTemplate.DeepCopy()
pod, err := podTemplate.GetPodTemplate(i, count)
if err != nil {
lock.Lock()
defer lock.Unlock()
createError = fmt.Errorf("error getting pod template: %s", err)
return
}
pod = pod.DeepCopy()
pod.Spec.Volumes = []v1.Volume{
{
Name: "vol",
@ -1345,7 +1360,7 @@ func CreatePodWithPersistentVolume(ctx context.Context, client clientset.Interfa
return createError
}
func NewCustomCreatePodStrategy(podTemplate *v1.Pod) TestPodCreateStrategy {
func NewCustomCreatePodStrategy(podTemplate PodTemplate) TestPodCreateStrategy {
return func(ctx context.Context, client clientset.Interface, namespace string, podCount int) error {
return CreatePod(ctx, client, namespace, podCount, podTemplate)
}
@ -1354,7 +1369,32 @@ func NewCustomCreatePodStrategy(podTemplate *v1.Pod) TestPodCreateStrategy {
// volumeFactory creates an unique PersistentVolume for given integer.
type volumeFactory func(uniqueID int) *v1.PersistentVolume
func NewCreatePodWithPersistentVolumeStrategy(claimTemplate *v1.PersistentVolumeClaim, factory volumeFactory, podTemplate *v1.Pod) TestPodCreateStrategy {
// PodTemplate is responsible for creating a v1.Pod instance that is ready
// to be sent to the API server.
type PodTemplate interface {
// GetPodTemplate returns a pod template for one out of many different pods.
// Pods with numbers in the range [index, index+count-1] will be created
// based on what GetPodTemplate returns. It gets called multiple times
// with a fixed index and increasing count parameters. This number can,
// but doesn't have to be, used to modify parts of the pod spec like
// for example a named reference to some other object.
GetPodTemplate(index, count int) (*v1.Pod, error)
}
// StaticPodTemplate returns an implementation of PodTemplate for a fixed pod that is the same regardless of the index.
func StaticPodTemplate(pod *v1.Pod) PodTemplate {
return (*staticPodTemplate)(pod)
}
type staticPodTemplate v1.Pod
// GetPodTemplate implements [PodTemplate.GetPodTemplate] by returning the same pod
// for each call.
func (s *staticPodTemplate) GetPodTemplate(index, count int) (*v1.Pod, error) {
return (*v1.Pod)(s), nil
}
func NewCreatePodWithPersistentVolumeStrategy(claimTemplate *v1.PersistentVolumeClaim, factory volumeFactory, podTemplate PodTemplate) TestPodCreateStrategy {
return func(ctx context.Context, client clientset.Interface, namespace string, podCount int) error {
return CreatePodWithPersistentVolume(ctx, client, namespace, claimTemplate, factory, podTemplate, podCount, true /* bindVolume */)
}