mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-20 10:20:51 +00:00
Merge pull request #93252 from adtac/scheduler-perf
scheduler_perf: refactor to allow arbitrary workloads
This commit is contained in:
commit
ff1d6e8c1d
@ -58,7 +58,7 @@ func NewIntegrationTestNodePreparerWithNodeSpec(client clientset.Interface, coun
|
||||
}
|
||||
|
||||
// PrepareNodes prepares countToStrategy test nodes.
|
||||
func (p *IntegrationTestNodePreparer) PrepareNodes() error {
|
||||
func (p *IntegrationTestNodePreparer) PrepareNodes(nextNodeIndex int) error {
|
||||
numNodes := 0
|
||||
for _, v := range p.countToStrategy {
|
||||
numNodes += v.Count
|
||||
@ -103,11 +103,9 @@ func (p *IntegrationTestNodePreparer) PrepareNodes() error {
|
||||
if err != nil {
|
||||
klog.Fatalf("Error listing nodes: %v", err)
|
||||
}
|
||||
index := 0
|
||||
sum := 0
|
||||
index := nextNodeIndex
|
||||
for _, v := range p.countToStrategy {
|
||||
sum += v.Count
|
||||
for ; index < sum; index++ {
|
||||
for i := 0; i < v.Count; i, index = i+1, index+1 {
|
||||
if err := testutils.DoPrepareNode(p.client, &nodes.Items[index], v.Strategy); err != nil {
|
||||
klog.Errorf("Aborting node preparation: %v", err)
|
||||
return err
|
||||
@ -119,14 +117,18 @@ func (p *IntegrationTestNodePreparer) PrepareNodes() error {
|
||||
|
||||
// CleanupNodes deletes existing test nodes.
|
||||
func (p *IntegrationTestNodePreparer) CleanupNodes() error {
|
||||
// TODO(#93794): make CleanupNodes only clean up the nodes created by this
|
||||
// IntegrationTestNodePreparer to make this more intuitive.
|
||||
nodes, err := GetReadySchedulableNodes(p.client)
|
||||
if err != nil {
|
||||
klog.Fatalf("Error listing nodes: %v", err)
|
||||
}
|
||||
var errRet error
|
||||
for i := range nodes.Items {
|
||||
if err := p.client.CoreV1().Nodes().Delete(context.TODO(), nodes.Items[i].Name, metav1.DeleteOptions{}); err != nil {
|
||||
klog.Errorf("Error while deleting Node: %v", err)
|
||||
errRet = err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
return errRet
|
||||
}
|
||||
|
1
test/integration/scheduler_perf/.gitignore
vendored
Normal file
1
test/integration/scheduler_perf/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
BenchmarkPerfScheduling_*.json
|
@ -48,6 +48,7 @@ go_test(
|
||||
"//staging/src/k8s.io/api/storage/v1beta1:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library",
|
||||
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
|
||||
"//staging/src/k8s.io/client-go/informers/core/v1:go_default_library",
|
||||
"//staging/src/k8s.io/client-go/kubernetes:go_default_library",
|
||||
|
@ -1,264 +1,426 @@
|
||||
- template:
|
||||
desc: SchedulingBasic
|
||||
initPods:
|
||||
- podTemplatePath: config/pod-default.yaml
|
||||
podsToSchedule:
|
||||
podTemplatePath: config/pod-default.yaml
|
||||
params:
|
||||
- numNodes: 500
|
||||
numInitPods: [500]
|
||||
numPodsToSchedule: 1000
|
||||
- numNodes: 5000
|
||||
numInitPods: [5000]
|
||||
numPodsToSchedule: 1000
|
||||
- template:
|
||||
desc: SchedulingPodAntiAffinity
|
||||
nodes:
|
||||
uniqueNodeLabelStrategy:
|
||||
labelKey: kubernetes.io/hostname
|
||||
initPods:
|
||||
- podTemplatePath: config/pod-with-pod-anti-affinity.yaml
|
||||
podsToSchedule:
|
||||
podTemplatePath: config/pod-with-pod-anti-affinity.yaml
|
||||
params:
|
||||
- numNodes: 500
|
||||
numInitPods: [100]
|
||||
numPodsToSchedule: 400
|
||||
- numNodes: 5000
|
||||
numInitPods: [1000]
|
||||
numPodsToSchedule: 1000
|
||||
- template:
|
||||
desc: SchedulingSecrets
|
||||
initPods:
|
||||
- podTemplatePath: config/pod-with-secret-volume.yaml
|
||||
podsToSchedule:
|
||||
podTemplatePath: config/pod-with-secret-volume.yaml
|
||||
params:
|
||||
- numNodes: 500
|
||||
numInitPods: [500]
|
||||
numPodsToSchedule: 1000
|
||||
- numNodes: 5000
|
||||
numInitPods: [5000]
|
||||
numPodsToSchedule: 1000
|
||||
- template:
|
||||
desc: SchedulingInTreePVs
|
||||
initPods:
|
||||
- persistentVolumeTemplatePath: config/pv-aws.yaml
|
||||
persistentVolumeClaimTemplatePath: config/pvc.yaml
|
||||
podsToSchedule:
|
||||
persistentVolumeTemplatePath: config/pv-aws.yaml
|
||||
persistentVolumeClaimTemplatePath: config/pvc.yaml
|
||||
params:
|
||||
- numNodes: 500
|
||||
numInitPods: [500]
|
||||
numPodsToSchedule: 1000
|
||||
- numNodes: 5000
|
||||
numInitPods: [5000]
|
||||
numPodsToSchedule: 1000
|
||||
- template:
|
||||
desc: SchedulingMigratedInTreePVs
|
||||
nodes:
|
||||
nodeTemplatePath: config/node-default.yaml
|
||||
nodeAllocatableStrategy:
|
||||
nodeAllocatable:
|
||||
attachable-volumes-csi-ebs.csi.aws.com: 39
|
||||
csiNodeAllocatable:
|
||||
ebs.csi.aws.com:
|
||||
count: 39
|
||||
migratedPlugins:
|
||||
- "kubernetes.io/aws-ebs"
|
||||
initPods:
|
||||
- persistentVolumeTemplatePath: config/pv-aws.yaml
|
||||
persistentVolumeClaimTemplatePath: config/pvc.yaml
|
||||
podsToSchedule:
|
||||
persistentVolumeTemplatePath: config/pv-aws.yaml
|
||||
persistentVolumeClaimTemplatePath: config/pvc.yaml
|
||||
featureGates:
|
||||
CSIMigration: true
|
||||
CSIMigrationAWS: true
|
||||
params:
|
||||
- numNodes: 500
|
||||
numInitPods: [500]
|
||||
numPodsToSchedule: 1000
|
||||
- numNodes: 5000
|
||||
numInitPods: [5000]
|
||||
numPodsToSchedule: 1000
|
||||
- template:
|
||||
desc: SchedulingCSIPVs
|
||||
nodes:
|
||||
nodeTemplatePath: config/node-default.yaml
|
||||
nodeAllocatableStrategy:
|
||||
nodeAllocatable:
|
||||
attachable-volumes-csi-ebs.csi.aws.com: 39
|
||||
csiNodeAllocatable:
|
||||
ebs.csi.aws.com:
|
||||
count: 39
|
||||
initPods:
|
||||
- persistentVolumeTemplatePath: config/pv-csi.yaml
|
||||
persistentVolumeClaimTemplatePath: config/pvc.yaml
|
||||
podsToSchedule:
|
||||
persistentVolumeTemplatePath: config/pv-csi.yaml
|
||||
persistentVolumeClaimTemplatePath: config/pvc.yaml
|
||||
params:
|
||||
- numNodes: 500
|
||||
numInitPods: [500]
|
||||
numPodsToSchedule: 1000
|
||||
- numNodes: 5000
|
||||
numInitPods: [5000]
|
||||
numPodsToSchedule: 1000
|
||||
- template:
|
||||
desc: SchedulingPodAffinity
|
||||
nodes:
|
||||
nodeTemplatePath: config/node-default.yaml
|
||||
labelNodePrepareStrategy:
|
||||
labelKey: "failure-domain.beta.kubernetes.io/zone"
|
||||
labelValues: ["zone1"]
|
||||
initPods:
|
||||
- podTemplatePath: config/pod-with-pod-affinity.yaml
|
||||
podsToSchedule:
|
||||
podTemplatePath: config/pod-with-pod-affinity.yaml
|
||||
params:
|
||||
- numNodes: 500
|
||||
numInitPods: [500]
|
||||
numPodsToSchedule: 1000
|
||||
- numNodes: 5000
|
||||
numInitPods: [5000]
|
||||
numPodsToSchedule: 1000
|
||||
- template:
|
||||
desc: SchedulingPreferredPodAffinity
|
||||
nodes:
|
||||
uniqueNodeLabelStrategy:
|
||||
labelKey: kubernetes.io/hostname
|
||||
initPods:
|
||||
- podTemplatePath: config/pod-with-preferred-pod-affinity.yaml
|
||||
podsToSchedule:
|
||||
podTemplatePath: config/pod-with-preferred-pod-affinity.yaml
|
||||
params:
|
||||
- numNodes: 500
|
||||
numInitPods: [500]
|
||||
numPodsToSchedule: 1000
|
||||
- numNodes: 5000
|
||||
numInitPods: [5000]
|
||||
numPodsToSchedule: 1000
|
||||
- template:
|
||||
desc: SchedulingPreferredPodAntiAffinity
|
||||
nodes:
|
||||
uniqueNodeLabelStrategy:
|
||||
labelKey: kubernetes.io/hostname
|
||||
initPods:
|
||||
- podTemplatePath: config/pod-with-preferred-pod-anti-affinity.yaml
|
||||
podsToSchedule:
|
||||
podTemplatePath: config/pod-with-preferred-pod-anti-affinity.yaml
|
||||
params:
|
||||
- numNodes: 500
|
||||
numInitPods: [500]
|
||||
numPodsToSchedule: 1000
|
||||
- numNodes: 5000
|
||||
numInitPods: [5000]
|
||||
numPodsToSchedule: 1000
|
||||
- template:
|
||||
desc: SchedulingNodeAffinity
|
||||
nodes:
|
||||
nodeTemplatePath: config/node-default.yaml
|
||||
labelNodePrepareStrategy:
|
||||
labelKey: "failure-domain.beta.kubernetes.io/zone"
|
||||
labelValues: ["zone1"]
|
||||
initPods:
|
||||
- podTemplatePath: config/pod-with-node-affinity.yaml
|
||||
podsToSchedule:
|
||||
podTemplatePath: config/pod-with-node-affinity.yaml
|
||||
params:
|
||||
- numNodes: 500
|
||||
numInitPods: [500]
|
||||
numPodsToSchedule: 1000
|
||||
- numNodes: 5000
|
||||
numInitPods: [5000]
|
||||
numPodsToSchedule: 1000
|
||||
- template:
|
||||
desc: TopologySpreading
|
||||
nodes:
|
||||
nodeTemplatePath: config/node-default.yaml
|
||||
labelNodePrepareStrategy:
|
||||
labelKey: "topology.kubernetes.io/zone"
|
||||
labelValues: ["moon-1", "moon-2", "moon-3"]
|
||||
initPods:
|
||||
- podTemplatePath: config/pod-default.yaml
|
||||
podsToSchedule:
|
||||
podTemplatePath: config/pod-with-topology-spreading.yaml
|
||||
params:
|
||||
- numNodes: 500
|
||||
numInitPods: [1000]
|
||||
numPodsToSchedule: 1000
|
||||
- numNodes: 5000
|
||||
numInitPods: [5000]
|
||||
numPodsToSchedule: 2000
|
||||
- template:
|
||||
desc: PreferredTopologySpreading
|
||||
nodes:
|
||||
nodeTemplatePath: config/node-default.yaml
|
||||
labelNodePrepareStrategy:
|
||||
labelKey: "topology.kubernetes.io/zone"
|
||||
labelValues: ["moon-1", "moon-2", "moon-3"]
|
||||
initPods:
|
||||
- podTemplatePath: config/pod-default.yaml
|
||||
podsToSchedule:
|
||||
podTemplatePath: config/pod-with-preferred-topology-spreading.yaml
|
||||
params:
|
||||
- numNodes: 500
|
||||
numInitPods: [1000]
|
||||
numPodsToSchedule: 1000
|
||||
- numNodes: 5000
|
||||
numInitPods: [5000]
|
||||
numPodsToSchedule: 2000
|
||||
- template:
|
||||
desc: MixedSchedulingBasePod
|
||||
nodes:
|
||||
nodeTemplatePath: config/node-default.yaml
|
||||
labelNodePrepareStrategy:
|
||||
labelKey: "topology.kubernetes.io/zone"
|
||||
labelValues: ["zone1"]
|
||||
initPods:
|
||||
- podTemplatePath: config/pod-default.yaml
|
||||
- podTemplatePath: config/pod-with-pod-affinity.yaml
|
||||
- podTemplatePath: config/pod-with-pod-anti-affinity.yaml
|
||||
- podTemplatePath: config/pod-with-preferred-pod-affinity.yaml
|
||||
- podTemplatePath: config/pod-with-preferred-pod-anti-affinity.yaml
|
||||
podsToSchedule:
|
||||
podTemplatePath: config/pod-default.yaml
|
||||
params:
|
||||
- numNodes: 500
|
||||
numInitPods: [200, 200, 200, 200, 200]
|
||||
numPodsToSchedule: 1000
|
||||
- numNodes: 5000
|
||||
numInitPods: [2000, 2000, 2000, 2000, 2000]
|
||||
numPodsToSchedule: 1000
|
||||
- template:
|
||||
desc: Preemption
|
||||
initPods:
|
||||
- podTemplatePath: config/pod-low-priority.yaml
|
||||
podsToSchedule:
|
||||
podTemplatePath: config/pod-high-priority.yaml
|
||||
params:
|
||||
- numNodes: 500
|
||||
numInitPods: [2000]
|
||||
numPodsToSchedule: 500
|
||||
- numNodes: 5000
|
||||
numInitPods: [20000]
|
||||
numPodsToSchedule: 5000
|
||||
- template:
|
||||
desc: Unschedulable
|
||||
skipWaitUntilInitPodsScheduled: true
|
||||
initPods:
|
||||
- podTemplatePath: config/pod-large-cpu.yaml
|
||||
podsToSchedule:
|
||||
podTemplatePath: config/pod-default.yaml
|
||||
params:
|
||||
- numNodes: 500
|
||||
numInitPods: [200]
|
||||
numPodsToSchedule: 1000
|
||||
- numNodes: 5000
|
||||
numInitPods: [200]
|
||||
numPodsToSchedule: 5000
|
||||
- numNodes: 5000
|
||||
numInitPods: [2000]
|
||||
numPodsToSchedule: 5000
|
||||
- name: SchedulingBasic
|
||||
workloadTemplate:
|
||||
- opcode: createNodes
|
||||
countParam: $initNodes
|
||||
- opcode: createPods
|
||||
countParam: $initPods
|
||||
podTemplatePath: config/pod-default.yaml
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
podTemplatePath: config/pod-default.yaml
|
||||
collectMetrics: true
|
||||
workloads:
|
||||
- name: 500Nodes
|
||||
params:
|
||||
initNodes: 500
|
||||
initPods: 500
|
||||
measurePods: 1000
|
||||
- name: 5000Nodes
|
||||
params:
|
||||
initNodes: 5000
|
||||
initPods: 1000
|
||||
measurePods: 1000
|
||||
|
||||
- name: SchedulingPodAntiAffinity
|
||||
workloadTemplate:
|
||||
- opcode: createNodes
|
||||
countParam: $initNodes
|
||||
uniqueNodeLabelStrategy:
|
||||
labelKey: kubernetes.io/hostname
|
||||
- opcode: createPods
|
||||
countParam: $initPods
|
||||
podTemplatePath: config/pod-with-pod-anti-affinity.yaml
|
||||
namespace: sched-setup
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
podTemplatePath: config/pod-with-pod-anti-affinity.yaml
|
||||
collectMetrics: true
|
||||
namespace: sched-test
|
||||
workloads:
|
||||
- name: 500Nodes
|
||||
params:
|
||||
initNodes: 500
|
||||
initPods: 100
|
||||
measurePods: 400
|
||||
- name: 5000Nodes
|
||||
params:
|
||||
initNodes: 500
|
||||
initPods: 100
|
||||
measurePods: 400
|
||||
|
||||
- name: SchedulingSecrets
|
||||
workloadTemplate:
|
||||
- opcode: createNodes
|
||||
countParam: $initNodes
|
||||
- opcode: createPods
|
||||
countParam: $initPods
|
||||
podTemplatePath: config/pod-with-secret-volume.yaml
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
podTemplatePath: config/pod-with-secret-volume.yaml
|
||||
collectMetrics: true
|
||||
workloads:
|
||||
- name: 500Nodes
|
||||
params:
|
||||
initNodes: 500
|
||||
initPods: 500
|
||||
measurePods: 1000
|
||||
- name: 5000Nodes
|
||||
params:
|
||||
initNodes: 5000
|
||||
initPods: 5000
|
||||
measurePods: 1000
|
||||
|
||||
- name: SchedulingInTreePVs
|
||||
workloadTemplate:
|
||||
- opcode: createNodes
|
||||
countParam: $initNodes
|
||||
- opcode: createPods
|
||||
countParam: $initPods
|
||||
persistentVolumeTemplatePath: config/pv-aws.yaml
|
||||
persistentVolumeClaimTemplatePath: config/pvc.yaml
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
persistentVolumeTemplatePath: config/pv-aws.yaml
|
||||
persistentVolumeClaimTemplatePath: config/pvc.yaml
|
||||
collectMetrics: true
|
||||
workloads:
|
||||
- name: 500Nodes
|
||||
params:
|
||||
initNodes: 500
|
||||
initPods: 500
|
||||
measurePods: 1000
|
||||
- name: 5000Nodes
|
||||
params:
|
||||
initNodes: 5000
|
||||
initPods: 5000
|
||||
measurePods: 1000
|
||||
|
||||
- name: SchedulingMigratedInTreePVs
|
||||
featureGates:
|
||||
CSIMigration: true
|
||||
CSIMigrationAWS: true
|
||||
workloadTemplate:
|
||||
- opcode: createNodes
|
||||
countParam: $initNodes
|
||||
nodeTemplatePath: config/node-default.yaml
|
||||
nodeAllocatableStrategy:
|
||||
nodeAllocatable:
|
||||
attachable-volumes-csi-ebs.csi.aws.com: "39"
|
||||
csiNodeAllocatable:
|
||||
ebs.csi.aws.com:
|
||||
count: 39
|
||||
migratedPlugins:
|
||||
- "kubernetes.io/aws-ebs"
|
||||
- opcode: createPods
|
||||
countParam: $initPods
|
||||
persistentVolumeTemplatePath: config/pv-aws.yaml
|
||||
persistentVolumeClaimTemplatePath: config/pvc.yaml
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
persistentVolumeTemplatePath: config/pv-aws.yaml
|
||||
persistentVolumeClaimTemplatePath: config/pvc.yaml
|
||||
collectMetrics: true
|
||||
workloads:
|
||||
- name: 500Nodes
|
||||
params:
|
||||
initNodes: 500
|
||||
initPods: 500
|
||||
measurePods: 1000
|
||||
- name: 5000Nodes
|
||||
params:
|
||||
initNodes: 5000
|
||||
initPods: 5000
|
||||
measurePods: 1000
|
||||
|
||||
- name: SchedulingCSIPVs
|
||||
workloadTemplate:
|
||||
- opcode: createNodes
|
||||
countParam: $initNodes
|
||||
nodeTemplatePath: config/node-default.yaml
|
||||
nodeAllocatableStrategy:
|
||||
nodeAllocatable:
|
||||
attachable-volumes-csi-ebs.csi.aws.com: "39"
|
||||
csiNodeAllocatable:
|
||||
ebs.csi.aws.com:
|
||||
count: 39
|
||||
- opcode: createPods
|
||||
countParam: $initPods
|
||||
persistentVolumeTemplatePath: config/pv-csi.yaml
|
||||
persistentVolumeClaimTemplatePath: config/pvc.yaml
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
persistentVolumeTemplatePath: config/pv-csi.yaml
|
||||
persistentVolumeClaimTemplatePath: config/pvc.yaml
|
||||
collectMetrics: true
|
||||
workloads:
|
||||
- name: 500Nodes
|
||||
params:
|
||||
initNodes: 500
|
||||
initPods: 500
|
||||
measurePods: 1000
|
||||
- name: 5000Nodes
|
||||
params:
|
||||
initNodes: 5000
|
||||
initPods: 5000
|
||||
measurePods: 1000
|
||||
|
||||
- name: SchedulingPodAffinity
|
||||
workloadTemplate:
|
||||
- opcode: createNodes
|
||||
countParam: $initNodes
|
||||
nodeTemplatePath: config/node-default.yaml
|
||||
labelNodePrepareStrategy:
|
||||
labelKey: "failure-domain.beta.kubernetes.io/zone"
|
||||
labelValues: ["zone1"]
|
||||
- opcode: createPods
|
||||
countParam: $initPods
|
||||
podTemplatePath: config/pod-with-pod-affinity.yaml
|
||||
namespace: sched-setup
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
podTemplatePath: config/pod-with-pod-affinity.yaml
|
||||
namespace: sched-test
|
||||
collectMetrics: true
|
||||
workloads:
|
||||
- name: 500Nodes
|
||||
params:
|
||||
initNodes: 500
|
||||
initPods: 500
|
||||
measurePods: 1000
|
||||
- name: 5000Nodes
|
||||
params:
|
||||
initNodes: 5000
|
||||
initPods: 5000
|
||||
measurePods: 1000
|
||||
|
||||
- name: SchedulingPreferredPodAffinity
|
||||
workloadTemplate:
|
||||
- opcode: createNodes
|
||||
countParam: $initNodes
|
||||
uniqueNodeLabelStrategy:
|
||||
labelKey: kubernetes.io/hostname
|
||||
- opcode: createPods
|
||||
countParam: $initPods
|
||||
podTemplatePath: config/pod-with-preferred-pod-affinity.yaml
|
||||
namespace: sched-setup
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
podTemplatePath: config/pod-with-preferred-pod-affinity.yaml
|
||||
namespace: sched-test
|
||||
collectMetrics: true
|
||||
workloads:
|
||||
- name: 500Nodes
|
||||
params:
|
||||
initNodes: 500
|
||||
initPods: 500
|
||||
measurePods: 1000
|
||||
- name: 5000Nodes
|
||||
params:
|
||||
initNodes: 5000
|
||||
initPods: 5000
|
||||
measurePods: 1000
|
||||
|
||||
- name: SchedulingPreferredPodAntiAffinity
|
||||
workloadTemplate:
|
||||
- opcode: createNodes
|
||||
countParam: $initNodes
|
||||
uniqueNodeLabelStrategy:
|
||||
labelKey: kubernetes.io/hostname
|
||||
- opcode: createPods
|
||||
countParam: $initPods
|
||||
podTemplatePath: config/pod-with-preferred-pod-anti-affinity.yaml
|
||||
namespace: sched-setup
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
podTemplatePath: config/pod-with-preferred-pod-anti-affinity.yaml
|
||||
namespace: sched-test
|
||||
collectMetrics: true
|
||||
workloads:
|
||||
- name: 500Nodes
|
||||
params:
|
||||
initNodes: 500
|
||||
initPods: 500
|
||||
measurePods: 1000
|
||||
- name: 5000Nodes
|
||||
params:
|
||||
initNodes: 5000
|
||||
initPods: 5000
|
||||
measurePods: 1000
|
||||
|
||||
- name: SchedulingNodeAffinity
|
||||
workloadTemplate:
|
||||
- opcode: createNodes
|
||||
countParam: $initNodes
|
||||
nodeTemplatePath: config/node-default.yaml
|
||||
labelNodePrepareStrategy:
|
||||
labelKey: "failure-domain.beta.kubernetes.io/zone"
|
||||
labelValues: ["zone1"]
|
||||
- opcode: createPods
|
||||
countParam: $initPods
|
||||
podTemplatePath: config/pod-with-node-affinity.yaml
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
podTemplatePath: config/pod-with-node-affinity.yaml
|
||||
collectMetrics: true
|
||||
workloads:
|
||||
- name: 500Nodes
|
||||
params:
|
||||
initNodes: 500
|
||||
initPods: 500
|
||||
measurePods: 1000
|
||||
- name: 5000Nodes
|
||||
params:
|
||||
initNodes: 5000
|
||||
initPods: 5000
|
||||
measurePods: 1000
|
||||
|
||||
- name: TopologySpreading
|
||||
workloadTemplate:
|
||||
- opcode: createNodes
|
||||
countParam: $initNodes
|
||||
nodeTemplatePath: config/node-default.yaml
|
||||
labelNodePrepareStrategy:
|
||||
labelKey: "topology.kubernetes.io/zone"
|
||||
labelValues: ["moon-1", "moon-2", "moon-3"]
|
||||
- opcode: createPods
|
||||
countParam: $initPods
|
||||
podTemplatePath: config/pod-default.yaml
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
podTemplatePath: config/pod-with-topology-spreading.yaml
|
||||
collectMetrics: true
|
||||
workloads:
|
||||
- name: 500Nodes
|
||||
params:
|
||||
initNodes: 500
|
||||
initPods: 1000
|
||||
measurePods: 1000
|
||||
- name: 5000Nodes
|
||||
params:
|
||||
initNodes: 5000
|
||||
initPods: 5000
|
||||
measurePods: 2000
|
||||
|
||||
- name: PreferredTopologySpreading
|
||||
workloadTemplate:
|
||||
- opcode: createNodes
|
||||
countParam: $initNodes
|
||||
nodeTemplatePath: config/node-default.yaml
|
||||
labelNodePrepareStrategy:
|
||||
labelKey: "topology.kubernetes.io/zone"
|
||||
labelValues: ["moon-1", "moon-2", "moon-3"]
|
||||
- opcode: createPods
|
||||
countParam: $initPods
|
||||
podTemplatePath: config/pod-default.yaml
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
podTemplatePath: config/pod-with-preferred-topology-spreading.yaml
|
||||
collectMetrics: true
|
||||
workloads:
|
||||
- name: 500Nodes
|
||||
params:
|
||||
initNodes: 500
|
||||
initPods: 1000
|
||||
measurePods: 1000
|
||||
- name: 5000Nodes
|
||||
params:
|
||||
initNodes: 5000
|
||||
initPods: 5000
|
||||
measurePods: 2000
|
||||
|
||||
- name: MixedSchedulingBasePod
|
||||
workloadTemplate:
|
||||
- opcode: createNodes
|
||||
countParam: $initNodes
|
||||
nodeTemplatePath: config/node-default.yaml
|
||||
labelNodePrepareStrategy:
|
||||
labelKey: "topology.kubernetes.io/zone"
|
||||
labelValues: ["zone1"]
|
||||
- opcode: createPods
|
||||
countParam: $initPods
|
||||
podTemplatePath: config/pod-default.yaml
|
||||
namespace: sched-setup
|
||||
- opcode: createPods
|
||||
countParam: $initPods
|
||||
podTemplatePath: config/pod-with-pod-affinity.yaml
|
||||
namespace: sched-setup
|
||||
- opcode: createPods
|
||||
countParam: $initPods
|
||||
podTemplatePath: config/pod-with-pod-anti-affinity.yaml
|
||||
namespace: sched-setup
|
||||
- opcode: createPods
|
||||
countParam: $initPods
|
||||
podTemplatePath: config/pod-with-preferred-pod-affinity.yaml
|
||||
namespace: sched-setup
|
||||
- opcode: createPods
|
||||
countParam: $initPods
|
||||
podTemplatePath: config/pod-with-preferred-pod-anti-affinity.yaml
|
||||
namespace: sched-setup
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
podTemplatePath: config/pod-default.yaml
|
||||
collectMetrics: true
|
||||
workloads:
|
||||
- name: 500Nodes
|
||||
params:
|
||||
initNodes: 500
|
||||
initPods: 200
|
||||
measurePods: 1000
|
||||
- name: 5000Nodes
|
||||
params:
|
||||
initNodes: 5000
|
||||
initPods: 2000
|
||||
measurePods: 1000
|
||||
|
||||
- name: Preemption
|
||||
workloadTemplate:
|
||||
- opcode: createNodes
|
||||
countParam: $initNodes
|
||||
- opcode: createPods
|
||||
countParam: $initPods
|
||||
podTemplatePath: config/pod-low-priority.yaml
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
podTemplatePath: config/pod-high-priority.yaml
|
||||
collectMetrics: true
|
||||
workloads:
|
||||
- name: 500Nodes
|
||||
params:
|
||||
initNodes: 500
|
||||
initPods: 2000
|
||||
measurePods: 500
|
||||
- name: 5000Nodes
|
||||
params:
|
||||
initNodes: 5000
|
||||
initPods: 20000
|
||||
measurePods: 5000
|
||||
|
||||
- name: Unschedulable
|
||||
workloadTemplate:
|
||||
- opcode: createNodes
|
||||
countParam: $initNodes
|
||||
- opcode: createPods
|
||||
countParam: $initPods
|
||||
podTemplatePath: config/pod-large-cpu.yaml
|
||||
skipWaitToCompletion: true
|
||||
- opcode: createPods
|
||||
countParam: $measurePods
|
||||
podTemplatePath: config/pod-default.yaml
|
||||
collectMetrics: true
|
||||
workloads:
|
||||
- name: 500Nodes/200InitPods
|
||||
params:
|
||||
initNodes: 500
|
||||
initPods: 200
|
||||
measurePods: 1000
|
||||
- name: 5000Nodes/200InitPods
|
||||
params:
|
||||
initNodes: 5000
|
||||
initPods: 200
|
||||
measurePods: 5000
|
||||
- name: 5000Nodes/2000InitPods
|
||||
params:
|
||||
initNodes: 5000
|
||||
initPods: 2000
|
||||
measurePods: 5000
|
||||
|
@ -448,7 +448,7 @@ func benchmarkScheduling(numExistingPods, minPods int,
|
||||
clientset,
|
||||
nodeStrategies,
|
||||
"scheduler-perf-")
|
||||
if err := nodePreparer.PrepareNodes(); err != nil {
|
||||
if err := nodePreparer.PrepareNodes(0); err != nil {
|
||||
klog.Fatalf("%v", err)
|
||||
}
|
||||
defer nodePreparer.CleanupNodes()
|
||||
|
@ -17,12 +17,17 @@ limitations under the License.
|
||||
package benchmark
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
coreinformers "k8s.io/client-go/informers/core/v1"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
@ -35,7 +40,10 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
configFile = "config/performance-config.yaml"
|
||||
configFile = "config/performance-config.yaml"
|
||||
createNodesOpcode = "createNodes"
|
||||
createPodsOpcode = "createPods"
|
||||
barrierOpcode = "barrier"
|
||||
)
|
||||
|
||||
var (
|
||||
@ -51,87 +59,248 @@ var (
|
||||
}
|
||||
)
|
||||
|
||||
// testCase configures a test case to run the scheduler performance test. Users should be able to
|
||||
// provide this via a YAML file.
|
||||
//
|
||||
// It specifies nodes and pods in the cluster before running the test. It also specifies the pods to
|
||||
// schedule during the test. The config can be as simple as just specify number of nodes/pods, where
|
||||
// default spec will be applied. It also allows the user to specify a pod spec template for more
|
||||
// complicated test cases.
|
||||
//
|
||||
// It also specifies the metrics to be collected after the test. If nothing is specified, default metrics
|
||||
// such as scheduling throughput and latencies will be collected.
|
||||
// testCase defines a set of test cases that intend to test the performance of
|
||||
// similar workloads of varying sizes with shared overall settings such as
|
||||
// feature gates and metrics collected.
|
||||
type testCase struct {
|
||||
// description of the test case
|
||||
Desc string
|
||||
// configures nodes in the cluster
|
||||
Nodes nodeCase
|
||||
// configures pods in the cluster before running the tests
|
||||
InitPods []podCase
|
||||
// configures the test to now wait for init pods to schedule before creating
|
||||
// test pods.
|
||||
SkipWaitUntilInitPodsScheduled bool
|
||||
// pods to be scheduled during the test.
|
||||
PodsToSchedule podCase
|
||||
// optional, feature gates to set before running the test
|
||||
// Name of the testCase.
|
||||
Name string
|
||||
// Feature gates to set before running the test. Optional.
|
||||
FeatureGates map[featuregate.Feature]bool
|
||||
// optional, replaces default defaultMetricsCollectorConfig if supplied.
|
||||
// List of metrics to collect. Optional, defaults to
|
||||
// defaultMetricsCollectorConfig if unspecified.
|
||||
MetricsCollectorConfig *metricsCollectorConfig
|
||||
// Template for sequence of ops that each workload must follow. Each op will
|
||||
// be executed serially one after another. Each element of the list must be
|
||||
// createNodesOp, createPodsOp, or barrierOp.
|
||||
WorkloadTemplate []op
|
||||
// List of workloads to run under this testCase.
|
||||
Workloads []*workload
|
||||
// TODO(#93792): reduce config toil by having a default pod and node spec per
|
||||
// testCase? CreatePods and CreateNodes ops will inherit these unless
|
||||
// manually overridden.
|
||||
}
|
||||
|
||||
type nodeCase struct {
|
||||
Num int
|
||||
func (tc *testCase) collectsMetrics() bool {
|
||||
for _, op := range tc.WorkloadTemplate {
|
||||
if op.realOp.collectsMetrics() {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// workload is a subtest under a testCase that tests the scheduler performance
|
||||
// for a certain ordering of ops. The set of nodes created and pods scheduled
|
||||
// in a workload may be heterogenous.
|
||||
type workload struct {
|
||||
// Name of the workload.
|
||||
Name string
|
||||
// Values of parameters used in the workloadTemplate.
|
||||
Params map[string]int
|
||||
}
|
||||
|
||||
// op is a dummy struct which stores the real op in itself.
|
||||
type op struct {
|
||||
realOp realOp
|
||||
}
|
||||
|
||||
// UnmarshalJSON is a custom unmarshaler for the op struct since we don't know
|
||||
// which op we're decoding at runtime.
|
||||
func (op *op) UnmarshalJSON(b []byte) error {
|
||||
possibleOps := []realOp{
|
||||
&createNodesOp{},
|
||||
&createPodsOp{},
|
||||
&barrierOp{},
|
||||
// TODO(#93793): add a sleep timer op to simulate waiting?
|
||||
// TODO(#94601): add a delete nodes op to simulate scaling behaviour?
|
||||
}
|
||||
var firstError error
|
||||
for _, possibleOp := range possibleOps {
|
||||
if err := json.Unmarshal(b, possibleOp); err == nil {
|
||||
if err2 := possibleOp.isValid(true); err2 == nil {
|
||||
op.realOp = possibleOp
|
||||
return nil
|
||||
} else if firstError == nil {
|
||||
// Don't return an error yet. Even though this op is invalid, it may
|
||||
// still match other possible ops.
|
||||
firstError = err2
|
||||
}
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("cannot unmarshal %s into any known op type: %w", string(b), firstError)
|
||||
}
|
||||
|
||||
// realOp is an interface that is implemented by different structs. To evaluate
|
||||
// the validity of ops at parse-time, a isValid function must be implemented.
|
||||
type realOp interface {
|
||||
// isValid verifies the validity of the op args such as node/pod count. Note
|
||||
// that we don't catch undefined parameters at this stage.
|
||||
isValid(allowParameterization bool) error
|
||||
// collectsMetrics checks if the op collects metrics.
|
||||
collectsMetrics() bool
|
||||
// patchParams returns a patched realOp of the same type after substituting
|
||||
// parameterizable values with workload-specific values. One should implement
|
||||
// this method on the value receiver base type, not a pointer receiver base
|
||||
// type, even though calls will be made from with a *realOp. This is because
|
||||
// callers don't want the receiver to inadvertently modify the realOp
|
||||
// (instead, it's returned as a return value).
|
||||
patchParams(w *workload) (realOp, error)
|
||||
}
|
||||
|
||||
func isValidParameterizable(val string) bool {
|
||||
return strings.HasPrefix(val, "$")
|
||||
}
|
||||
|
||||
// createNodesOp defines an op where nodes are created as a part of a workload.
|
||||
type createNodesOp struct {
|
||||
// Must be "createNodes".
|
||||
Opcode string
|
||||
// Number of nodes to create. Parameterizable through CountParam.
|
||||
Count int
|
||||
// Template parameter for Count.
|
||||
CountParam string
|
||||
// Path to spec file describing the nodes to create. Optional.
|
||||
NodeTemplatePath *string
|
||||
// At most one of the following strategies can be defined. If not specified, default to TrivialNodePrepareStrategy.
|
||||
// At most one of the following strategies can be defined. Optional, defaults
|
||||
// to TrivialNodePrepareStrategy if unspecified.
|
||||
NodeAllocatableStrategy *testutils.NodeAllocatableStrategy
|
||||
LabelNodePrepareStrategy *testutils.LabelNodePrepareStrategy
|
||||
UniqueNodeLabelStrategy *testutils.UniqueNodeLabelStrategy
|
||||
}
|
||||
|
||||
type podCase struct {
|
||||
Num int
|
||||
PodTemplatePath *string
|
||||
func (cno *createNodesOp) isValid(allowParameterization bool) error {
|
||||
if cno.Opcode != createNodesOpcode {
|
||||
return fmt.Errorf("invalid opcode")
|
||||
}
|
||||
ok := (cno.Count > 0 ||
|
||||
(cno.CountParam != "" && allowParameterization && isValidParameterizable(cno.CountParam)))
|
||||
if !ok {
|
||||
return fmt.Errorf("invalid Count=%d / CountParam=%q", cno.Count, cno.CountParam)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (*createNodesOp) collectsMetrics() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func (cno createNodesOp) patchParams(w *workload) (realOp, error) {
|
||||
if cno.CountParam != "" {
|
||||
var ok bool
|
||||
if cno.Count, ok = w.Params[cno.CountParam[1:]]; !ok {
|
||||
return nil, fmt.Errorf("parameter %s is undefined", cno.CountParam)
|
||||
}
|
||||
}
|
||||
return &cno, (&cno).isValid(false)
|
||||
}
|
||||
|
||||
// createPodsOp defines an op where pods are scheduled as a part of a workload.
|
||||
// The test can block on the completion of this op before moving forward or
|
||||
// continue asynchronously.
|
||||
type createPodsOp struct {
|
||||
// Must be "createPods".
|
||||
Opcode string
|
||||
// Number of pods to schedule. Parameterizable through CountParam.
|
||||
Count int
|
||||
// Template parameter for Count.
|
||||
CountParam string
|
||||
// Whether or not to enable metrics collection for this createPodsOp.
|
||||
// Optional. Both CollectMetrics and SkipWaitToCompletion cannot be true at
|
||||
// the same time for a particular createPodsOp.
|
||||
CollectMetrics bool
|
||||
// Namespace the pods should be created in. Optional, defaults to a unique
|
||||
// namespace of the format "namespace-<number>".
|
||||
Namespace *string
|
||||
// Path to spec file describing the pods to schedule. Optional.
|
||||
PodTemplatePath *string
|
||||
// Whether or not to wait for all pods in this op to get scheduled. Optional,
|
||||
// defaults to false.
|
||||
SkipWaitToCompletion bool
|
||||
// Persistent volume settings for the pods to be scheduled. Optional.
|
||||
PersistentVolumeTemplatePath *string
|
||||
PersistentVolumeClaimTemplatePath *string
|
||||
}
|
||||
|
||||
// simpleTestCases defines a set of test cases that share the same template (node spec, pod spec, etc)
|
||||
// with testParams(e.g., NumNodes) being overridden. This provides a convenient way to define multiple tests
|
||||
// with various sizes.
|
||||
type simpleTestCases struct {
|
||||
Template testCase
|
||||
Params []testParams
|
||||
func (cpo *createPodsOp) isValid(allowParameterization bool) error {
|
||||
if cpo.Opcode != createPodsOpcode {
|
||||
return fmt.Errorf("invalid opcode")
|
||||
}
|
||||
ok := (cpo.Count > 0 ||
|
||||
(cpo.CountParam != "" && allowParameterization && isValidParameterizable(cpo.CountParam)))
|
||||
if !ok {
|
||||
return fmt.Errorf("invalid Count=%d / CountParam=%q", cpo.Count, cpo.CountParam)
|
||||
}
|
||||
if cpo.CollectMetrics && cpo.SkipWaitToCompletion {
|
||||
// While it's technically possible to achieve this, the additional
|
||||
// complexity is not worth it, especially given that we don't have any
|
||||
// use-cases right now.
|
||||
return fmt.Errorf("collectMetrics and skipWaitToCompletion cannot be true at the same time")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type testParams struct {
|
||||
NumNodes int
|
||||
NumInitPods []int
|
||||
NumPodsToSchedule int
|
||||
func (cpo *createPodsOp) collectsMetrics() bool {
|
||||
return cpo.CollectMetrics
|
||||
}
|
||||
|
||||
type testDataCollector interface {
|
||||
run(stopCh chan struct{})
|
||||
collect() []DataItem
|
||||
func (cpo createPodsOp) patchParams(w *workload) (realOp, error) {
|
||||
if cpo.CountParam != "" {
|
||||
var ok bool
|
||||
if cpo.Count, ok = w.Params[cpo.CountParam[1:]]; !ok {
|
||||
return nil, fmt.Errorf("parameter %s is undefined", cpo.CountParam)
|
||||
}
|
||||
}
|
||||
return &cpo, (&cpo).isValid(false)
|
||||
}
|
||||
|
||||
// barrierOp defines an op that can be used to wait until all scheduled pods of
|
||||
// one or many namespaces have been bound to nodes. This is useful when pods
|
||||
// were scheduled with SkipWaitToCompletion set to true. A barrierOp is added
|
||||
// at the end of each each workload automatically.
|
||||
type barrierOp struct {
|
||||
// Must be "barrier".
|
||||
Opcode string
|
||||
// Namespaces to block on. Empty array or not specifying this field signifies
|
||||
// that the barrier should block on all namespaces.
|
||||
Namespaces []string
|
||||
}
|
||||
|
||||
func (bo *barrierOp) isValid(allowParameterization bool) error {
|
||||
if bo.Opcode != barrierOpcode {
|
||||
return fmt.Errorf("invalid opcode")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (*barrierOp) collectsMetrics() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func (bo barrierOp) patchParams(w *workload) (realOp, error) {
|
||||
return &bo, nil
|
||||
}
|
||||
|
||||
func BenchmarkPerfScheduling(b *testing.B) {
|
||||
dataItems := DataItems{Version: "v1"}
|
||||
tests, err := parseTestCases(configFile)
|
||||
testCases, err := getTestCases(configFile)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
if err = validateTestCases(testCases); err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
initPods := 0
|
||||
for _, p := range test.InitPods {
|
||||
initPods += p.Num
|
||||
}
|
||||
name := fmt.Sprintf("%v/%vNodes/%vInitPods/%vPodsToSchedule", test.Desc, test.Nodes.Num, initPods, test.PodsToSchedule.Num)
|
||||
b.Run(name, func(b *testing.B) {
|
||||
for feature, flag := range test.FeatureGates {
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(b, utilfeature.DefaultFeatureGate, feature, flag)()
|
||||
dataItems := DataItems{Version: "v1"}
|
||||
for _, tc := range testCases {
|
||||
b.Run(tc.Name, func(b *testing.B) {
|
||||
for _, w := range tc.Workloads {
|
||||
b.Run(w.Name, func(b *testing.B) {
|
||||
for feature, flag := range tc.FeatureGates {
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(b, utilfeature.DefaultFeatureGate, feature, flag)()
|
||||
}
|
||||
dataItems.DataItems = append(dataItems.DataItems, runWorkload(b, tc, w)...)
|
||||
})
|
||||
}
|
||||
dataItems.DataItems = append(dataItems.DataItems, perfScheduling(test, b)...)
|
||||
})
|
||||
}
|
||||
if err := dataItems2JSONFile(dataItems, b.Name()); err != nil {
|
||||
@ -139,202 +308,219 @@ func BenchmarkPerfScheduling(b *testing.B) {
|
||||
}
|
||||
}
|
||||
|
||||
func perfScheduling(test testCase, b *testing.B) []DataItem {
|
||||
func runWorkload(b *testing.B, tc *testCase, w *workload) []DataItem {
|
||||
// 30 minutes should be plenty enough even for the 5000-node tests.
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Minute)
|
||||
defer cancel()
|
||||
finalFunc, podInformer, clientset := mustSetupScheduler()
|
||||
defer finalFunc()
|
||||
|
||||
nodePreparer, err := getNodePreparer(test.Nodes, clientset)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
if err := nodePreparer.PrepareNodes(); err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
defer nodePreparer.CleanupNodes()
|
||||
|
||||
total := 0
|
||||
for _, p := range test.InitPods {
|
||||
if err := createPods(setupNamespace, p, clientset); err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
total += p.Num
|
||||
}
|
||||
if !test.SkipWaitUntilInitPodsScheduled {
|
||||
if err := waitNumPodsScheduled(b, total, podInformer, setupNamespace); err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
// start benchmark
|
||||
b.ResetTimer()
|
||||
|
||||
// Start test data collectors.
|
||||
stopCh := make(chan struct{})
|
||||
collectors := getTestDataCollectors(test, podInformer, b)
|
||||
for _, collector := range collectors {
|
||||
go collector.run(stopCh)
|
||||
}
|
||||
|
||||
// Schedule the main workload
|
||||
if err := createPods(testNamespace, test.PodsToSchedule, clientset); err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
if err := waitNumPodsScheduled(b, test.PodsToSchedule.Num, podInformer, testNamespace); err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
|
||||
close(stopCh)
|
||||
// Note: without this line we're taking the overhead of defer() into account.
|
||||
b.StopTimer()
|
||||
b.Cleanup(finalFunc)
|
||||
|
||||
var mu sync.Mutex
|
||||
var dataItems []DataItem
|
||||
for _, collector := range collectors {
|
||||
dataItems = append(dataItems, collector.collect()...)
|
||||
numPodsScheduledPerNamespace := make(map[string]int)
|
||||
nextNodeIndex := 0
|
||||
|
||||
for opIndex, op := range tc.WorkloadTemplate {
|
||||
realOp, err := op.realOp.patchParams(w)
|
||||
if err != nil {
|
||||
b.Fatalf("op %d: %v", opIndex, err)
|
||||
}
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
b.Fatalf("op %d: %v", opIndex, ctx.Err())
|
||||
default:
|
||||
}
|
||||
switch concreteOp := realOp.(type) {
|
||||
case *createNodesOp:
|
||||
nodePreparer, err := getNodePreparer(fmt.Sprintf("node-%d-", opIndex), concreteOp, clientset)
|
||||
if err != nil {
|
||||
b.Fatalf("op %d: %v", opIndex, err)
|
||||
}
|
||||
if err := nodePreparer.PrepareNodes(nextNodeIndex); err != nil {
|
||||
b.Fatalf("op %d: %v", opIndex, err)
|
||||
}
|
||||
b.Cleanup(func() {
|
||||
nodePreparer.CleanupNodes()
|
||||
})
|
||||
nextNodeIndex += concreteOp.Count
|
||||
|
||||
case *createPodsOp:
|
||||
var namespace string
|
||||
if concreteOp.Namespace != nil {
|
||||
namespace = *concreteOp.Namespace
|
||||
} else {
|
||||
namespace = fmt.Sprintf("namespace-%d", opIndex)
|
||||
}
|
||||
var collectors []testDataCollector
|
||||
var collectorCtx context.Context
|
||||
var collectorCancel func()
|
||||
if concreteOp.CollectMetrics {
|
||||
collectorCtx, collectorCancel = context.WithCancel(ctx)
|
||||
defer collectorCancel()
|
||||
collectors = getTestDataCollectors(podInformer, fmt.Sprintf("%s/%s", b.Name(), namespace), namespace, tc.MetricsCollectorConfig)
|
||||
for _, collector := range collectors {
|
||||
go collector.run(collectorCtx)
|
||||
}
|
||||
}
|
||||
if err := createPods(namespace, concreteOp, clientset); err != nil {
|
||||
b.Fatalf("op %d: %v", opIndex, err)
|
||||
}
|
||||
if concreteOp.SkipWaitToCompletion {
|
||||
// Only record those namespaces that may potentially require barriers
|
||||
// in the future.
|
||||
if _, ok := numPodsScheduledPerNamespace[namespace]; ok {
|
||||
numPodsScheduledPerNamespace[namespace] += concreteOp.Count
|
||||
} else {
|
||||
numPodsScheduledPerNamespace[namespace] = concreteOp.Count
|
||||
}
|
||||
} else {
|
||||
if err := waitUntilPodsScheduledInNamespace(ctx, podInformer, b.Name(), namespace, concreteOp.Count); err != nil {
|
||||
b.Fatalf("op %d: error in waiting for pods to get scheduled: %v", opIndex, err)
|
||||
}
|
||||
}
|
||||
if concreteOp.CollectMetrics {
|
||||
// CollectMetrics and SkipWaitToCompletion can never be true at the
|
||||
// same time, so if we're here, it means that all pods have been
|
||||
// scheduled.
|
||||
collectorCancel()
|
||||
mu.Lock()
|
||||
for _, collector := range collectors {
|
||||
dataItems = append(dataItems, collector.collect()...)
|
||||
}
|
||||
mu.Unlock()
|
||||
}
|
||||
|
||||
case *barrierOp:
|
||||
for _, namespace := range concreteOp.Namespaces {
|
||||
if _, ok := numPodsScheduledPerNamespace[namespace]; !ok {
|
||||
b.Fatalf("op %d: unknown namespace %s", opIndex, namespace)
|
||||
}
|
||||
}
|
||||
if err := waitUntilPodsScheduled(ctx, podInformer, b.Name(), concreteOp.Namespaces, numPodsScheduledPerNamespace); err != nil {
|
||||
b.Fatalf("op %d: %v", opIndex, err)
|
||||
}
|
||||
// At the end of the barrier, we can be sure that there are no pods
|
||||
// pending scheduling in the namespaces that we just blocked on.
|
||||
if len(concreteOp.Namespaces) == 0 {
|
||||
numPodsScheduledPerNamespace = make(map[string]int)
|
||||
} else {
|
||||
for _, namespace := range concreteOp.Namespaces {
|
||||
delete(numPodsScheduledPerNamespace, namespace)
|
||||
}
|
||||
}
|
||||
|
||||
default:
|
||||
b.Fatalf("op %d: invalid op %v", opIndex, concreteOp)
|
||||
}
|
||||
}
|
||||
if err := waitUntilPodsScheduled(ctx, podInformer, b.Name(), nil, numPodsScheduledPerNamespace); err != nil {
|
||||
// Any pending pods must be scheduled before this test can be considered to
|
||||
// be complete.
|
||||
b.Fatal(err)
|
||||
}
|
||||
return dataItems
|
||||
}
|
||||
|
||||
func waitNumPodsScheduled(b *testing.B, num int, podInformer coreinformers.PodInformer, namespace string) error {
|
||||
for {
|
||||
scheduled, err := getScheduledPods(podInformer, namespace)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if len(scheduled) >= num {
|
||||
break
|
||||
}
|
||||
klog.Infof("%s: got %d existing pods, required: %d", b.Name(), len(scheduled), num)
|
||||
time.Sleep(1 * time.Second)
|
||||
}
|
||||
return nil
|
||||
type testDataCollector interface {
|
||||
run(ctx context.Context)
|
||||
collect() []DataItem
|
||||
}
|
||||
|
||||
func getTestDataCollectors(tc testCase, podInformer coreinformers.PodInformer, b *testing.B) []testDataCollector {
|
||||
collectors := []testDataCollector{newThroughputCollector(podInformer, map[string]string{"Name": b.Name()}, []string{testNamespace})}
|
||||
metricsCollectorConfig := defaultMetricsCollectorConfig
|
||||
if tc.MetricsCollectorConfig != nil {
|
||||
metricsCollectorConfig = *tc.MetricsCollectorConfig
|
||||
func getTestDataCollectors(podInformer coreinformers.PodInformer, name, namespace string, mcc *metricsCollectorConfig) []testDataCollector {
|
||||
if mcc == nil {
|
||||
mcc = &defaultMetricsCollectorConfig
|
||||
}
|
||||
return []testDataCollector{
|
||||
newThroughputCollector(podInformer, map[string]string{"Name": name}, []string{namespace}),
|
||||
newMetricsCollector(mcc, map[string]string{"Name": name}),
|
||||
}
|
||||
collectors = append(collectors, newMetricsCollector(metricsCollectorConfig, map[string]string{"Name": b.Name()}))
|
||||
return collectors
|
||||
}
|
||||
|
||||
func getNodePreparer(nc nodeCase, clientset clientset.Interface) (testutils.TestNodePreparer, error) {
|
||||
func getNodePreparer(prefix string, cno *createNodesOp, clientset clientset.Interface) (testutils.TestNodePreparer, error) {
|
||||
var nodeStrategy testutils.PrepareNodeStrategy = &testutils.TrivialNodePrepareStrategy{}
|
||||
if nc.NodeAllocatableStrategy != nil {
|
||||
nodeStrategy = nc.NodeAllocatableStrategy
|
||||
} else if nc.LabelNodePrepareStrategy != nil {
|
||||
nodeStrategy = nc.LabelNodePrepareStrategy
|
||||
} else if nc.UniqueNodeLabelStrategy != nil {
|
||||
nodeStrategy = nc.UniqueNodeLabelStrategy
|
||||
if cno.NodeAllocatableStrategy != nil {
|
||||
nodeStrategy = cno.NodeAllocatableStrategy
|
||||
} else if cno.LabelNodePrepareStrategy != nil {
|
||||
nodeStrategy = cno.LabelNodePrepareStrategy
|
||||
} else if cno.UniqueNodeLabelStrategy != nil {
|
||||
nodeStrategy = cno.UniqueNodeLabelStrategy
|
||||
}
|
||||
|
||||
if nc.NodeTemplatePath != nil {
|
||||
node, err := getNodeSpecFromFile(nc.NodeTemplatePath)
|
||||
if cno.NodeTemplatePath != nil {
|
||||
node, err := getNodeSpecFromFile(cno.NodeTemplatePath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return framework.NewIntegrationTestNodePreparerWithNodeSpec(
|
||||
clientset,
|
||||
[]testutils.CountToStrategy{{Count: nc.Num, Strategy: nodeStrategy}},
|
||||
[]testutils.CountToStrategy{{Count: cno.Count, Strategy: nodeStrategy}},
|
||||
node,
|
||||
), nil
|
||||
}
|
||||
return framework.NewIntegrationTestNodePreparer(
|
||||
clientset,
|
||||
[]testutils.CountToStrategy{{Count: nc.Num, Strategy: nodeStrategy}},
|
||||
"scheduler-perf-",
|
||||
[]testutils.CountToStrategy{{Count: cno.Count, Strategy: nodeStrategy}},
|
||||
prefix,
|
||||
), nil
|
||||
}
|
||||
|
||||
func createPods(ns string, pc podCase, clientset clientset.Interface) error {
|
||||
strategy, err := getPodStrategy(pc)
|
||||
func createPods(namespace string, cpo *createPodsOp, clientset clientset.Interface) error {
|
||||
strategy, err := getPodStrategy(cpo)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
config := testutils.NewTestPodCreatorConfig()
|
||||
config.AddStrategy(ns, pc.Num, strategy)
|
||||
config.AddStrategy(namespace, cpo.Count, strategy)
|
||||
podCreator := testutils.NewTestPodCreator(clientset, config)
|
||||
return podCreator.CreatePods()
|
||||
}
|
||||
|
||||
func getPodStrategy(pc podCase) (testutils.TestPodCreateStrategy, error) {
|
||||
basePod := makeBasePod()
|
||||
if pc.PodTemplatePath != nil {
|
||||
var err error
|
||||
basePod, err = getPodSpecFromFile(pc.PodTemplatePath)
|
||||
// waitUntilPodsScheduledInNamespace blocks until all pods in the given
|
||||
// namespace are scheduled. Times out after 10 minutes because even at the
|
||||
// lowest observed QPS of ~10 pods/sec, a 5000-node test should complete.
|
||||
func waitUntilPodsScheduledInNamespace(ctx context.Context, podInformer coreinformers.PodInformer, name string, namespace string, wantCount int) error {
|
||||
return wait.PollImmediate(1*time.Second, 10*time.Minute, func() (bool, error) {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return true, ctx.Err()
|
||||
default:
|
||||
}
|
||||
scheduled, err := getScheduledPods(podInformer, namespace)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return false, err
|
||||
}
|
||||
if len(scheduled) >= wantCount {
|
||||
return true, nil
|
||||
}
|
||||
klog.Infof("%s: namespace %s: got %d pods, want %d", name, namespace, len(scheduled), wantCount)
|
||||
return false, nil
|
||||
})
|
||||
}
|
||||
|
||||
// waitUntilPodsScheduled blocks until the all pods in the given namespaces are
|
||||
// scheduled.
|
||||
func waitUntilPodsScheduled(ctx context.Context, podInformer coreinformers.PodInformer, name string, namespaces []string, numPodsScheduledPerNamespace map[string]int) error {
|
||||
// If unspecified, default to all known namespaces.
|
||||
if len(namespaces) == 0 {
|
||||
for namespace := range numPodsScheduledPerNamespace {
|
||||
namespaces = append(namespaces, namespace)
|
||||
}
|
||||
}
|
||||
if pc.PersistentVolumeClaimTemplatePath == nil {
|
||||
return testutils.NewCustomCreatePodStrategy(basePod), nil
|
||||
}
|
||||
|
||||
pvTemplate, err := getPersistentVolumeSpecFromFile(pc.PersistentVolumeTemplatePath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
pvcTemplate, err := getPersistentVolumeClaimSpecFromFile(pc.PersistentVolumeClaimTemplatePath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return testutils.NewCreatePodWithPersistentVolumeStrategy(pvcTemplate, getCustomVolumeFactory(pvTemplate), basePod), nil
|
||||
}
|
||||
|
||||
func parseTestCases(path string) ([]testCase, error) {
|
||||
var simpleTests []simpleTestCases
|
||||
if err := getSpecFromFile(&path, &simpleTests); err != nil {
|
||||
return nil, fmt.Errorf("parsing test cases: %v", err)
|
||||
}
|
||||
|
||||
testCases := make([]testCase, 0)
|
||||
for _, s := range simpleTests {
|
||||
testCase := s.Template
|
||||
for _, p := range s.Params {
|
||||
testCase.Nodes.Num = p.NumNodes
|
||||
testCase.InitPods = append([]podCase(nil), testCase.InitPods...)
|
||||
for i, v := range p.NumInitPods {
|
||||
testCase.InitPods[i].Num = v
|
||||
}
|
||||
testCase.PodsToSchedule.Num = p.NumPodsToSchedule
|
||||
testCases = append(testCases, testCase)
|
||||
for _, namespace := range namespaces {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
default:
|
||||
}
|
||||
wantCount, ok := numPodsScheduledPerNamespace[namespace]
|
||||
if !ok {
|
||||
return fmt.Errorf("unknown namespace %s", namespace)
|
||||
}
|
||||
if err := waitUntilPodsScheduledInNamespace(ctx, podInformer, name, namespace, wantCount); err != nil {
|
||||
return fmt.Errorf("error waiting for pods in namespace %q: %w", namespace, err)
|
||||
}
|
||||
}
|
||||
|
||||
return testCases, nil
|
||||
}
|
||||
|
||||
func getNodeSpecFromFile(path *string) (*v1.Node, error) {
|
||||
nodeSpec := &v1.Node{}
|
||||
if err := getSpecFromFile(path, nodeSpec); err != nil {
|
||||
return nil, fmt.Errorf("parsing Node: %v", err)
|
||||
}
|
||||
return nodeSpec, nil
|
||||
}
|
||||
|
||||
func getPodSpecFromFile(path *string) (*v1.Pod, error) {
|
||||
podSpec := &v1.Pod{}
|
||||
if err := getSpecFromFile(path, podSpec); err != nil {
|
||||
return nil, fmt.Errorf("parsing Pod: %v", err)
|
||||
}
|
||||
return podSpec, nil
|
||||
}
|
||||
|
||||
func getPersistentVolumeSpecFromFile(path *string) (*v1.PersistentVolume, error) {
|
||||
persistentVolumeSpec := &v1.PersistentVolume{}
|
||||
if err := getSpecFromFile(path, persistentVolumeSpec); err != nil {
|
||||
return nil, fmt.Errorf("parsing PersistentVolume: %v", err)
|
||||
}
|
||||
return persistentVolumeSpec, nil
|
||||
}
|
||||
|
||||
func getPersistentVolumeClaimSpecFromFile(path *string) (*v1.PersistentVolumeClaim, error) {
|
||||
persistentVolumeClaimSpec := &v1.PersistentVolumeClaim{}
|
||||
if err := getSpecFromFile(path, persistentVolumeClaimSpec); err != nil {
|
||||
return nil, fmt.Errorf("parsing PersistentVolumeClaim: %v", err)
|
||||
}
|
||||
return persistentVolumeClaimSpec, nil
|
||||
return nil
|
||||
}
|
||||
|
||||
func getSpecFromFile(path *string, spec interface{}) error {
|
||||
@ -342,7 +528,95 @@ func getSpecFromFile(path *string, spec interface{}) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return yaml.Unmarshal(bytes, spec)
|
||||
return yaml.UnmarshalStrict(bytes, spec)
|
||||
}
|
||||
|
||||
func getTestCases(path string) ([]*testCase, error) {
|
||||
testCases := make([]*testCase, 0)
|
||||
if err := getSpecFromFile(&path, &testCases); err != nil {
|
||||
return nil, fmt.Errorf("parsing test cases: %w", err)
|
||||
}
|
||||
return testCases, nil
|
||||
}
|
||||
|
||||
func validateTestCases(testCases []*testCase) error {
|
||||
if len(testCases) == 0 {
|
||||
return fmt.Errorf("no test cases defined")
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
if len(tc.Workloads) == 0 {
|
||||
return fmt.Errorf("%s: no workloads defined", tc.Name)
|
||||
}
|
||||
if len(tc.WorkloadTemplate) == 0 {
|
||||
return fmt.Errorf("%s: no ops defined", tc.Name)
|
||||
}
|
||||
// Make sure there's at least one CreatePods op with collectMetrics set to
|
||||
// true in each workload. What's the point of running a performance
|
||||
// benchmark if no statistics are collected for reporting?
|
||||
if !tc.collectsMetrics() {
|
||||
return fmt.Errorf("%s: no op in the workload template collects metrics", tc.Name)
|
||||
}
|
||||
// TODO(#93795): make sure each workload within a test case has a unique
|
||||
// name? The name is used to identify the stats in benchmark reports.
|
||||
// TODO(#94404): check for unused template parameters? Probably a typo.
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func getPodStrategy(cpo *createPodsOp) (testutils.TestPodCreateStrategy, error) {
|
||||
basePod := makeBasePod()
|
||||
if cpo.PodTemplatePath != nil {
|
||||
var err error
|
||||
basePod, err = getPodSpecFromFile(cpo.PodTemplatePath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
if cpo.PersistentVolumeClaimTemplatePath == nil {
|
||||
return testutils.NewCustomCreatePodStrategy(basePod), nil
|
||||
}
|
||||
|
||||
pvTemplate, err := getPersistentVolumeSpecFromFile(cpo.PersistentVolumeTemplatePath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
pvcTemplate, err := getPersistentVolumeClaimSpecFromFile(cpo.PersistentVolumeClaimTemplatePath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return testutils.NewCreatePodWithPersistentVolumeStrategy(pvcTemplate, getCustomVolumeFactory(pvTemplate), basePod), nil
|
||||
}
|
||||
|
||||
func getNodeSpecFromFile(path *string) (*v1.Node, error) {
|
||||
nodeSpec := &v1.Node{}
|
||||
if err := getSpecFromFile(path, nodeSpec); err != nil {
|
||||
return nil, fmt.Errorf("parsing Node: %w", err)
|
||||
}
|
||||
return nodeSpec, nil
|
||||
}
|
||||
|
||||
func getPodSpecFromFile(path *string) (*v1.Pod, error) {
|
||||
podSpec := &v1.Pod{}
|
||||
if err := getSpecFromFile(path, podSpec); err != nil {
|
||||
return nil, fmt.Errorf("parsing Pod: %w", err)
|
||||
}
|
||||
return podSpec, nil
|
||||
}
|
||||
|
||||
func getPersistentVolumeSpecFromFile(path *string) (*v1.PersistentVolume, error) {
|
||||
persistentVolumeSpec := &v1.PersistentVolume{}
|
||||
if err := getSpecFromFile(path, persistentVolumeSpec); err != nil {
|
||||
return nil, fmt.Errorf("parsing PersistentVolume: %w", err)
|
||||
}
|
||||
return persistentVolumeSpec, nil
|
||||
}
|
||||
|
||||
func getPersistentVolumeClaimSpecFromFile(path *string) (*v1.PersistentVolumeClaim, error) {
|
||||
persistentVolumeClaimSpec := &v1.PersistentVolumeClaim{}
|
||||
if err := getSpecFromFile(path, persistentVolumeClaimSpec); err != nil {
|
||||
return nil, fmt.Errorf("parsing PersistentVolumeClaim: %w", err)
|
||||
}
|
||||
return persistentVolumeClaimSpec, nil
|
||||
}
|
||||
|
||||
func getCustomVolumeFactory(pvTemplate *v1.PersistentVolume) func(id int) *v1.PersistentVolume {
|
||||
|
@ -17,6 +17,7 @@ limitations under the License.
|
||||
package benchmark
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"fmt"
|
||||
@ -147,18 +148,18 @@ type metricsCollectorConfig struct {
|
||||
// metricsCollector collects metrics from legacyregistry.DefaultGatherer.Gather() endpoint.
|
||||
// Currently only Histrogram metrics are supported.
|
||||
type metricsCollector struct {
|
||||
metricsCollectorConfig
|
||||
*metricsCollectorConfig
|
||||
labels map[string]string
|
||||
}
|
||||
|
||||
func newMetricsCollector(config metricsCollectorConfig, labels map[string]string) *metricsCollector {
|
||||
func newMetricsCollector(config *metricsCollectorConfig, labels map[string]string) *metricsCollector {
|
||||
return &metricsCollector{
|
||||
metricsCollectorConfig: config,
|
||||
labels: labels,
|
||||
}
|
||||
}
|
||||
|
||||
func (*metricsCollector) run(stopCh chan struct{}) {
|
||||
func (*metricsCollector) run(ctx context.Context) {
|
||||
// metricCollector doesn't need to start before the tests, so nothing to do here.
|
||||
}
|
||||
|
||||
@ -231,7 +232,7 @@ func newThroughputCollector(podInformer coreinformers.PodInformer, labels map[st
|
||||
}
|
||||
}
|
||||
|
||||
func (tc *throughputCollector) run(stopCh chan struct{}) {
|
||||
func (tc *throughputCollector) run(ctx context.Context) {
|
||||
podsScheduled, err := getScheduledPods(tc.podInformer, tc.namespaces...)
|
||||
if err != nil {
|
||||
klog.Fatalf("%v", err)
|
||||
@ -239,8 +240,9 @@ func (tc *throughputCollector) run(stopCh chan struct{}) {
|
||||
lastScheduledCount := len(podsScheduled)
|
||||
for {
|
||||
select {
|
||||
case <-stopCh:
|
||||
case <-ctx.Done():
|
||||
return
|
||||
// TODO(#94665): use time.Ticker instead
|
||||
case <-time.After(throughputSampleFrequency):
|
||||
podsScheduled, err := getScheduledPods(tc.podInformer, tc.namespaces...)
|
||||
if err != nil {
|
||||
|
@ -931,7 +931,7 @@ type CountToStrategy struct {
|
||||
}
|
||||
|
||||
type TestNodePreparer interface {
|
||||
PrepareNodes() error
|
||||
PrepareNodes(nextNodeIndex int) error
|
||||
CleanupNodes() error
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user