scheduler_perf: test DRA with structured parameters

This commit is contained in:
Patrick Ohly 2024-02-20 14:05:38 +01:00
parent d4d5ade7f5
commit 4ed2b3eaeb
5 changed files with 145 additions and 0 deletions

View File

@ -0,0 +1,9 @@
apiVersion: resource.k8s.io/v1alpha2
kind: ResourceClaimParameters
metadata:
name: test-claim-parameters
driverRequests:
- driverName: test-driver.cdi.k8s.io
requests:
- namedResources:
selector: "true"

View File

@ -0,0 +1,11 @@
apiVersion: resource.k8s.io/v1alpha2
kind: ResourceClaimTemplate
metadata:
name: test-claim-template
spec:
spec:
resourceClassName: test-class
parametersRef:
apiGroup: resource.k8s.io
kind: ResourceClaimParameters
name: test-claim-parameters

View File

@ -0,0 +1,6 @@
apiVersion: resource.k8s.io/v1alpha2
kind: ResourceClass
metadata:
name: test-class
driverName: test-driver.cdi.k8s.io
structuredParameters: true

View File

@ -853,3 +853,75 @@
initPods: 1000
measurePods: 1000
maxClaimsPerNode: 20
# SchedulingWithResourceClaimTemplate uses a ResourceClaimTemplate
# and dynamically creates ResourceClaim instances for each pod.
# The driver uses structured parameters.
- name: SchedulingWithResourceClaimTemplateStructured
featureGates:
DynamicResourceAllocation: true
workloadTemplate:
- opcode: createNodes
countParam: $nodesWithoutDRA
- opcode: createNodes
nodeTemplatePath: config/dra/node-with-dra-test-driver.yaml
countParam: $nodesWithDRA
- opcode: createResourceDriver
driverName: test-driver.cdi.k8s.io
nodes: scheduler-perf-dra-*
maxClaimsPerNodeParam: $maxClaimsPerNode
structuredParameters: true
- opcode: createAny
templatePath: config/dra/resourceclass-structured.yaml
- opcode: createAny
templatePath: config/dra/resourceclaimparameters.yaml
namespace: init
- opcode: createAny
templatePath: config/dra/resourceclaimtemplate-structured.yaml
namespace: init
- opcode: createPods
namespace: init
countParam: $initPods
podTemplatePath: config/dra/pod-with-claim-template.yaml
- opcode: createAny
templatePath: config/dra/resourceclaimparameters.yaml
namespace: test
- opcode: createAny
templatePath: config/dra/resourceclaimtemplate-structured.yaml
namespace: test
- opcode: createPods
namespace: test
countParam: $measurePods
podTemplatePath: config/dra/pod-with-claim-template.yaml
collectMetrics: true
workloads:
- name: fast
labels: [integration-test, fast]
params:
# This testcase runs through all code paths without
# taking too long overall.
nodesWithDRA: 1
nodesWithoutDRA: 1
initPods: 0
measurePods: 10
maxClaimsPerNode: 10
- name: 2000pods_100nodes
labels: [performance, fast]
params:
# In this testcase, the number of nodes is smaller
# than the limit for the PodScheduling slices.
nodesWithDRA: 100
nodesWithoutDRA: 0
initPods: 1000
measurePods: 1000
maxClaimsPerNode: 20
- name: 2000pods_200nodes
params:
# In this testcase, the driver and scheduler must
# truncate the PotentialNodes and UnsuitableNodes
# slices.
nodesWithDRA: 200
nodesWithoutDRA: 0
initPods: 1000
measurePods: 1000
maxClaimsPerNode: 10

View File

@ -126,6 +126,11 @@ type createResourceDriverOp struct {
MaxClaimsPerNodeParam string
// Nodes matching this glob pattern have resources managed by the driver.
Nodes string
// StructuredParameters is true if the controller that is built into the scheduler
// is used and the control-plane controller is not needed.
// Because we don't run the kubelet plugin, NodeResourceSlices must
// get created for all nodes.
StructuredParameters bool
}
var _ realOp = &createResourceDriverOp{}
@ -188,6 +193,23 @@ func (op *createResourceDriverOp) run(tCtx ktesting.TContext) {
}
}
if op.StructuredParameters {
for _, nodeName := range resources.Nodes {
slice := nodeResourceSlice(op.DriverName, nodeName, op.MaxClaimsPerNode)
_, err := tCtx.Client().ResourceV1alpha2().NodeResourceSlices().Create(tCtx, slice, metav1.CreateOptions{})
tCtx.ExpectNoError(err, "create node resource slice")
}
tCtx.CleanupCtx(func(tCtx ktesting.TContext) {
err := tCtx.Client().ResourceV1alpha2().NodeResourceSlices().DeleteCollection(tCtx,
metav1.DeleteOptions{},
metav1.ListOptions{FieldSelector: "driverName=" + op.DriverName},
)
tCtx.ExpectNoError(err, "delete node resource slices")
})
// No need for the controller.
return
}
controller := draapp.NewController(tCtx.Client(), resources)
ctx, cancel := context.WithCancel(tCtx)
var wg sync.WaitGroup
@ -205,3 +227,28 @@ func (op *createResourceDriverOp) run(tCtx ktesting.TContext) {
tCtx.Logf("stopped resource driver %q", op.DriverName)
})
}
func nodeResourceSlice(driverName, nodeName string, capacity int) *resourcev1alpha2.NodeResourceSlice {
slice := &resourcev1alpha2.NodeResourceSlice{
ObjectMeta: metav1.ObjectMeta{
Name: nodeName,
},
NodeName: nodeName,
DriverName: driverName,
NodeResourceModel: resourcev1alpha2.NodeResourceModel{
NamedResources: &resourcev1alpha2.NamedResourcesResources{},
},
}
for i := 0; i < capacity; i++ {
slice.NodeResourceModel.NamedResources.Instances = append(slice.NodeResourceModel.NamedResources.Instances,
resourcev1alpha2.NamedResourcesInstance{
Name: fmt.Sprintf("instance-%d", i),
},
)
}
return slice
}