Merge pull request #93252 from adtac/scheduler-perf

scheduler_perf: refactor to allow arbitrary workloads
2025-09-07 12:11:43 +00:00 · 2020-09-17 14:08:46 -07:00
parent a34fd1c65f 71bc9ce9c2
commit ff1d6e8c1d
8 changed files with 930 additions and 488 deletions
--- a/test/integration/framework/perf_utils.go
+++ b/test/integration/framework/perf_utils.go
@@ -58,7 +58,7 @@ func NewIntegrationTestNodePreparerWithNodeSpec(client clientset.Interface, coun
 }
 // PrepareNodes prepares countToStrategy test nodes.
-func (p *IntegrationTestNodePreparer) PrepareNodes() error {
+func (p *IntegrationTestNodePreparer) PrepareNodes(nextNodeIndex int) error {
 	numNodes := 0
 	for _, v := range p.countToStrategy {
 		numNodes += v.Count
@@ -103,11 +103,9 @@ func (p *IntegrationTestNodePreparer) PrepareNodes() error {
 	if err != nil {
 		klog.Fatalf("Error listing nodes: %v", err)
 	}
-	index := 0
+	index := nextNodeIndex
 	sum := 0
 	for _, v := range p.countToStrategy {
-		sum += v.Count
+		for i := 0; i < v.Count; i, index = i+1, index+1 {
 		for ; index < sum; index++ {
 			if err := testutils.DoPrepareNode(p.client, &nodes.Items[index], v.Strategy); err != nil {
 				klog.Errorf("Aborting node preparation: %v", err)
 				return err
@@ -119,14 +117,18 @@ func (p *IntegrationTestNodePreparer) PrepareNodes() error {
 // CleanupNodes deletes existing test nodes.
 func (p *IntegrationTestNodePreparer) CleanupNodes() error {
 	// TODO(#93794): make CleanupNodes only clean up the nodes created by this
 	// IntegrationTestNodePreparer to make this more intuitive.
 	nodes, err := GetReadySchedulableNodes(p.client)
 	if err != nil {
 		klog.Fatalf("Error listing nodes: %v", err)
 	}
 	var errRet error
 	for i := range nodes.Items {
 		if err := p.client.CoreV1().Nodes().Delete(context.TODO(), nodes.Items[i].Name, metav1.DeleteOptions{}); err != nil {
 			klog.Errorf("Error while deleting Node: %v", err)
 			errRet = err
 		}
 	}
-	return nil
+	return errRet
 }
--- a/test/integration/scheduler_perf/.gitignore
+++ b/test/integration/scheduler_perf/.gitignore
@@ -0,0 +1 @@
 BenchmarkPerfScheduling_*.json
--- a/test/integration/scheduler_perf/BUILD
+++ b/test/integration/scheduler_perf/BUILD
@@ -48,6 +48,7 @@ go_test(
        "//staging/src/k8s.io/api/storage/v1beta1:go_default_library",
        "//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
        "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
        "//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library",
        "//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
        "//staging/src/k8s.io/client-go/informers/core/v1:go_default_library",
        "//staging/src/k8s.io/client-go/kubernetes:go_default_library",
--- a/test/integration/scheduler_perf/config/performance-config.yaml
+++ b/test/integration/scheduler_perf/config/performance-config.yaml
@@ -1,264 +1,426 @@
- template:
+- name: SchedulingBasic
-    desc: SchedulingBasic
+  workloadTemplate:
-    initPods:
+  - opcode: createNodes
-    - podTemplatePath: config/pod-default.yaml
+    countParam: $initNodes
-    podsToSchedule:
+  - opcode: createPods
-      podTemplatePath: config/pod-default.yaml
+    countParam: $initPods
-  params:
+    podTemplatePath: config/pod-default.yaml
-  - numNodes: 500
+  - opcode: createPods
-    numInitPods: [500]
+    countParam: $measurePods
-    numPodsToSchedule: 1000
+    podTemplatePath: config/pod-default.yaml
-  - numNodes: 5000
+    collectMetrics: true
-    numInitPods: [5000]
+  workloads:
-    numPodsToSchedule: 1000
+  - name: 500Nodes
- template:
+    params:
-    desc: SchedulingPodAntiAffinity
+      initNodes: 500
-    nodes:
+      initPods: 500
-      uniqueNodeLabelStrategy:
+      measurePods: 1000
-        labelKey: kubernetes.io/hostname
+  - name: 5000Nodes
-    initPods:
+    params:
-    - podTemplatePath: config/pod-with-pod-anti-affinity.yaml
+      initNodes: 5000
-    podsToSchedule:
+      initPods: 1000
-      podTemplatePath: config/pod-with-pod-anti-affinity.yaml
+      measurePods: 1000
-  params:
+
-  - numNodes: 500
+- name: SchedulingPodAntiAffinity
-    numInitPods: [100]
+  workloadTemplate:
-    numPodsToSchedule: 400
+  - opcode: createNodes
-  - numNodes: 5000
+    countParam: $initNodes
-    numInitPods: [1000]
+    uniqueNodeLabelStrategy:
-    numPodsToSchedule: 1000
+      labelKey: kubernetes.io/hostname
- template:
+  - opcode: createPods
-    desc: SchedulingSecrets
+    countParam: $initPods
-    initPods:
+    podTemplatePath: config/pod-with-pod-anti-affinity.yaml
-    - podTemplatePath: config/pod-with-secret-volume.yaml
+    namespace: sched-setup
-    podsToSchedule:
+  - opcode: createPods
-      podTemplatePath: config/pod-with-secret-volume.yaml
+    countParam: $measurePods
-  params:
+    podTemplatePath: config/pod-with-pod-anti-affinity.yaml
-  - numNodes: 500
+    collectMetrics: true
-    numInitPods: [500]
+    namespace: sched-test
-    numPodsToSchedule: 1000
+  workloads:
-  - numNodes: 5000
+  - name: 500Nodes
-    numInitPods: [5000]
+    params:
-    numPodsToSchedule: 1000
+      initNodes: 500
- template:
+      initPods: 100
-    desc: SchedulingInTreePVs
+      measurePods: 400
-    initPods:
+  - name: 5000Nodes
-    - persistentVolumeTemplatePath: config/pv-aws.yaml
+    params:
-      persistentVolumeClaimTemplatePath: config/pvc.yaml
+      initNodes: 500
-    podsToSchedule:
+      initPods: 100
-      persistentVolumeTemplatePath: config/pv-aws.yaml
+      measurePods: 400
-      persistentVolumeClaimTemplatePath: config/pvc.yaml
+
-  params:
+- name: SchedulingSecrets
-  - numNodes: 500
+  workloadTemplate:
-    numInitPods: [500]
+  - opcode: createNodes
-    numPodsToSchedule: 1000
+    countParam: $initNodes
-  - numNodes: 5000
+  - opcode: createPods
-    numInitPods: [5000]
+    countParam: $initPods
-    numPodsToSchedule: 1000
+    podTemplatePath: config/pod-with-secret-volume.yaml
- template:
+  - opcode: createPods
-    desc: SchedulingMigratedInTreePVs
+    countParam: $measurePods
-    nodes:
+    podTemplatePath: config/pod-with-secret-volume.yaml
-      nodeTemplatePath: config/node-default.yaml
+    collectMetrics: true
-      nodeAllocatableStrategy:
+  workloads:
-        nodeAllocatable:
+  - name: 500Nodes
-          attachable-volumes-csi-ebs.csi.aws.com: 39
+    params:
-        csiNodeAllocatable:
+      initNodes: 500
-          ebs.csi.aws.com:
+      initPods: 500
-            count: 39
+      measurePods: 1000
-        migratedPlugins:
+  - name: 5000Nodes
-        - "kubernetes.io/aws-ebs"
+    params:
-    initPods:
+      initNodes: 5000
-    - persistentVolumeTemplatePath: config/pv-aws.yaml
+      initPods: 5000
-      persistentVolumeClaimTemplatePath: config/pvc.yaml
+      measurePods: 1000
-    podsToSchedule:
+
-      persistentVolumeTemplatePath: config/pv-aws.yaml
+- name: SchedulingInTreePVs
-      persistentVolumeClaimTemplatePath: config/pvc.yaml
+  workloadTemplate:
-    featureGates:
+  - opcode: createNodes
-      CSIMigration: true
+    countParam: $initNodes
-      CSIMigrationAWS: true
+  - opcode: createPods
-  params:
+    countParam: $initPods
-  - numNodes: 500
+    persistentVolumeTemplatePath: config/pv-aws.yaml
-    numInitPods: [500]
+    persistentVolumeClaimTemplatePath: config/pvc.yaml
-    numPodsToSchedule: 1000
+  - opcode: createPods
-  - numNodes: 5000
+    countParam: $measurePods
-    numInitPods: [5000]
+    persistentVolumeTemplatePath: config/pv-aws.yaml
-    numPodsToSchedule: 1000
+    persistentVolumeClaimTemplatePath: config/pvc.yaml
- template:
+    collectMetrics: true
-    desc: SchedulingCSIPVs
+  workloads:
-    nodes:
+  - name: 500Nodes
-      nodeTemplatePath: config/node-default.yaml
+    params:
-      nodeAllocatableStrategy:
+      initNodes: 500
-        nodeAllocatable:
+      initPods: 500
-          attachable-volumes-csi-ebs.csi.aws.com: 39
+      measurePods: 1000
-        csiNodeAllocatable:
+  - name: 5000Nodes
-          ebs.csi.aws.com:
+    params:
-            count: 39
+      initNodes: 5000
-    initPods:
+      initPods: 5000
-    - persistentVolumeTemplatePath: config/pv-csi.yaml
+      measurePods: 1000
-      persistentVolumeClaimTemplatePath: config/pvc.yaml
+
-    podsToSchedule:
+- name: SchedulingMigratedInTreePVs
-      persistentVolumeTemplatePath: config/pv-csi.yaml
+  featureGates:
-      persistentVolumeClaimTemplatePath: config/pvc.yaml
+    CSIMigration: true
-  params:
+    CSIMigrationAWS: true
-  - numNodes: 500
+  workloadTemplate:
-    numInitPods: [500]
+  - opcode: createNodes
-    numPodsToSchedule: 1000
+    countParam: $initNodes
-  - numNodes: 5000
+    nodeTemplatePath: config/node-default.yaml
-    numInitPods: [5000]
+    nodeAllocatableStrategy:
-    numPodsToSchedule: 1000
+      nodeAllocatable:
- template:
+        attachable-volumes-csi-ebs.csi.aws.com: "39"
-    desc: SchedulingPodAffinity
+      csiNodeAllocatable:
-    nodes:
+        ebs.csi.aws.com:
-      nodeTemplatePath: config/node-default.yaml
+          count: 39
-      labelNodePrepareStrategy:
+      migratedPlugins:
-        labelKey: "failure-domain.beta.kubernetes.io/zone"
+      - "kubernetes.io/aws-ebs"
-        labelValues: ["zone1"]
+  - opcode: createPods
-    initPods:
+    countParam: $initPods
-    - podTemplatePath: config/pod-with-pod-affinity.yaml
+    persistentVolumeTemplatePath: config/pv-aws.yaml
-    podsToSchedule:
+    persistentVolumeClaimTemplatePath: config/pvc.yaml
-      podTemplatePath: config/pod-with-pod-affinity.yaml
+  - opcode: createPods
-  params:
+    countParam: $measurePods
-  - numNodes: 500
+    persistentVolumeTemplatePath: config/pv-aws.yaml
-    numInitPods: [500]
+    persistentVolumeClaimTemplatePath: config/pvc.yaml
-    numPodsToSchedule: 1000
+    collectMetrics: true
-  - numNodes: 5000
+  workloads:
-    numInitPods: [5000]
+  - name: 500Nodes
-    numPodsToSchedule: 1000
+    params:
- template:
+      initNodes: 500
-    desc: SchedulingPreferredPodAffinity
+      initPods: 500
-    nodes:
+      measurePods: 1000
-      uniqueNodeLabelStrategy:
+  - name: 5000Nodes
-        labelKey: kubernetes.io/hostname
+    params:
-    initPods:
+      initNodes: 5000
-    - podTemplatePath: config/pod-with-preferred-pod-affinity.yaml
+      initPods: 5000
-    podsToSchedule:
+      measurePods: 1000
-      podTemplatePath: config/pod-with-preferred-pod-affinity.yaml
+
-  params:
+- name: SchedulingCSIPVs
-  - numNodes: 500
+  workloadTemplate:
-    numInitPods: [500]
+  - opcode: createNodes
-    numPodsToSchedule: 1000
+    countParam: $initNodes
-  - numNodes: 5000
+    nodeTemplatePath: config/node-default.yaml
-    numInitPods: [5000]
+    nodeAllocatableStrategy:
-    numPodsToSchedule: 1000
+      nodeAllocatable:
- template:
+        attachable-volumes-csi-ebs.csi.aws.com: "39"
-    desc: SchedulingPreferredPodAntiAffinity
+      csiNodeAllocatable:
-    nodes:
+        ebs.csi.aws.com:
-      uniqueNodeLabelStrategy:
+          count: 39
-        labelKey: kubernetes.io/hostname
+  - opcode: createPods
-    initPods:
+    countParam: $initPods
-    - podTemplatePath: config/pod-with-preferred-pod-anti-affinity.yaml
+    persistentVolumeTemplatePath: config/pv-csi.yaml
-    podsToSchedule:
+    persistentVolumeClaimTemplatePath: config/pvc.yaml
-      podTemplatePath: config/pod-with-preferred-pod-anti-affinity.yaml
+  - opcode: createPods
-  params:
+    countParam: $measurePods
-  - numNodes: 500
+    persistentVolumeTemplatePath: config/pv-csi.yaml
-    numInitPods: [500]
+    persistentVolumeClaimTemplatePath: config/pvc.yaml
-    numPodsToSchedule: 1000
+    collectMetrics: true
-  - numNodes: 5000
+  workloads:
-    numInitPods: [5000]
+  - name: 500Nodes
-    numPodsToSchedule: 1000
+    params:
- template:
+      initNodes: 500
-    desc: SchedulingNodeAffinity
+      initPods: 500
-    nodes:
+      measurePods: 1000
-      nodeTemplatePath: config/node-default.yaml
+  - name: 5000Nodes
-      labelNodePrepareStrategy:
+    params:
-        labelKey: "failure-domain.beta.kubernetes.io/zone"
+      initNodes: 5000
-        labelValues: ["zone1"]
+      initPods: 5000
-    initPods:
+      measurePods: 1000
-    - podTemplatePath: config/pod-with-node-affinity.yaml
+
-    podsToSchedule:
+- name: SchedulingPodAffinity
-      podTemplatePath: config/pod-with-node-affinity.yaml
+  workloadTemplate:
-  params:
+  - opcode: createNodes
-  - numNodes: 500
+    countParam: $initNodes
-    numInitPods: [500]
+    nodeTemplatePath: config/node-default.yaml
-    numPodsToSchedule: 1000
+    labelNodePrepareStrategy:
-  - numNodes: 5000
+      labelKey: "failure-domain.beta.kubernetes.io/zone"
-    numInitPods: [5000]
+      labelValues: ["zone1"]
-    numPodsToSchedule: 1000
+  - opcode: createPods
- template:
+    countParam: $initPods
-    desc: TopologySpreading
+    podTemplatePath: config/pod-with-pod-affinity.yaml
-    nodes:
+    namespace: sched-setup
-      nodeTemplatePath: config/node-default.yaml
+  - opcode: createPods
-      labelNodePrepareStrategy:
+    countParam: $measurePods
-        labelKey: "topology.kubernetes.io/zone"
+    podTemplatePath: config/pod-with-pod-affinity.yaml
-        labelValues: ["moon-1", "moon-2", "moon-3"]
+    namespace: sched-test
-    initPods:
+    collectMetrics: true
-    - podTemplatePath: config/pod-default.yaml
+  workloads:
-    podsToSchedule:
+  - name: 500Nodes
-      podTemplatePath: config/pod-with-topology-spreading.yaml
+    params:
-  params:
+      initNodes: 500
-  - numNodes: 500
+      initPods: 500
-    numInitPods: [1000]
+      measurePods: 1000
-    numPodsToSchedule: 1000
+  - name: 5000Nodes
-  - numNodes: 5000
+    params:
-    numInitPods: [5000]
+      initNodes: 5000
-    numPodsToSchedule: 2000
+      initPods: 5000
- template:
+      measurePods: 1000
-    desc: PreferredTopologySpreading
+
-    nodes:
+- name: SchedulingPreferredPodAffinity
-      nodeTemplatePath: config/node-default.yaml
+  workloadTemplate:
-      labelNodePrepareStrategy:
+  - opcode: createNodes
-        labelKey: "topology.kubernetes.io/zone"
+    countParam: $initNodes
-        labelValues: ["moon-1", "moon-2", "moon-3"]
+    uniqueNodeLabelStrategy:
-    initPods:
+      labelKey: kubernetes.io/hostname
-    - podTemplatePath: config/pod-default.yaml
+  - opcode: createPods
-    podsToSchedule:
+    countParam: $initPods
-      podTemplatePath: config/pod-with-preferred-topology-spreading.yaml
+    podTemplatePath: config/pod-with-preferred-pod-affinity.yaml
-  params:
+    namespace: sched-setup
-  - numNodes: 500
+  - opcode: createPods
-    numInitPods: [1000]
+    countParam: $measurePods
-    numPodsToSchedule: 1000
+    podTemplatePath: config/pod-with-preferred-pod-affinity.yaml
-  - numNodes: 5000
+    namespace: sched-test
-    numInitPods: [5000]
+    collectMetrics: true
-    numPodsToSchedule: 2000
+  workloads:
- template:
+  - name: 500Nodes
-    desc: MixedSchedulingBasePod
+    params:
-    nodes:
+      initNodes: 500
-      nodeTemplatePath: config/node-default.yaml
+      initPods: 500
-      labelNodePrepareStrategy:
+      measurePods: 1000
-        labelKey: "topology.kubernetes.io/zone"
+  - name: 5000Nodes
-        labelValues: ["zone1"]
+    params:
-    initPods:
+      initNodes: 5000
-    - podTemplatePath: config/pod-default.yaml
+      initPods: 5000
-    - podTemplatePath: config/pod-with-pod-affinity.yaml
+      measurePods: 1000
-    - podTemplatePath: config/pod-with-pod-anti-affinity.yaml
+
-    - podTemplatePath: config/pod-with-preferred-pod-affinity.yaml
+- name: SchedulingPreferredPodAntiAffinity
-    - podTemplatePath: config/pod-with-preferred-pod-anti-affinity.yaml
+  workloadTemplate:
-    podsToSchedule:
+  - opcode: createNodes
-      podTemplatePath: config/pod-default.yaml
+    countParam: $initNodes
-  params:
+    uniqueNodeLabelStrategy:
-  - numNodes: 500
+      labelKey: kubernetes.io/hostname
-    numInitPods: [200, 200, 200, 200, 200]
+  - opcode: createPods
-    numPodsToSchedule: 1000
+    countParam: $initPods
-  - numNodes: 5000
+    podTemplatePath: config/pod-with-preferred-pod-anti-affinity.yaml
-    numInitPods: [2000, 2000, 2000, 2000, 2000]
+    namespace: sched-setup
-    numPodsToSchedule: 1000
+  - opcode: createPods
- template:
+    countParam: $measurePods
-    desc: Preemption
+    podTemplatePath: config/pod-with-preferred-pod-anti-affinity.yaml
-    initPods:
+    namespace: sched-test
-    - podTemplatePath: config/pod-low-priority.yaml
+    collectMetrics: true
-    podsToSchedule:
+  workloads:
-      podTemplatePath: config/pod-high-priority.yaml
+  - name: 500Nodes
-  params:
+    params:
-  - numNodes: 500
+      initNodes: 500
-    numInitPods: [2000]
+      initPods: 500
-    numPodsToSchedule: 500
+      measurePods: 1000
-  - numNodes: 5000
+  - name: 5000Nodes
-    numInitPods: [20000]
+    params:
-    numPodsToSchedule: 5000
+      initNodes: 5000
- template:
+      initPods: 5000
-    desc: Unschedulable
+      measurePods: 1000
-    skipWaitUntilInitPodsScheduled: true
+
-    initPods:
+- name: SchedulingNodeAffinity
-    - podTemplatePath: config/pod-large-cpu.yaml
+  workloadTemplate:
-    podsToSchedule:
+  - opcode: createNodes
-      podTemplatePath: config/pod-default.yaml
+    countParam: $initNodes
-  params:
+    nodeTemplatePath: config/node-default.yaml
-  - numNodes: 500
+    labelNodePrepareStrategy:
-    numInitPods: [200]
+      labelKey: "failure-domain.beta.kubernetes.io/zone"
-    numPodsToSchedule: 1000
+      labelValues: ["zone1"]
-  - numNodes: 5000
+  - opcode: createPods
-    numInitPods: [200]
+    countParam: $initPods
-    numPodsToSchedule: 5000
+    podTemplatePath: config/pod-with-node-affinity.yaml
-  - numNodes: 5000
+  - opcode: createPods
-    numInitPods: [2000]
+    countParam: $measurePods
-    numPodsToSchedule: 5000
+    podTemplatePath: config/pod-with-node-affinity.yaml
    collectMetrics: true
  workloads:
  - name: 500Nodes
    params:
      initNodes: 500
      initPods: 500
      measurePods: 1000
  - name: 5000Nodes
    params:
      initNodes: 5000
      initPods: 5000
      measurePods: 1000
 - name: TopologySpreading
  workloadTemplate:
  - opcode: createNodes
    countParam: $initNodes
    nodeTemplatePath: config/node-default.yaml
    labelNodePrepareStrategy:
      labelKey: "topology.kubernetes.io/zone"
      labelValues: ["moon-1", "moon-2", "moon-3"]
  - opcode: createPods
    countParam: $initPods
    podTemplatePath: config/pod-default.yaml
  - opcode: createPods
    countParam: $measurePods
    podTemplatePath: config/pod-with-topology-spreading.yaml
    collectMetrics: true
  workloads:
  - name: 500Nodes
    params:
      initNodes: 500
      initPods: 1000
      measurePods: 1000
  - name: 5000Nodes
    params:
      initNodes: 5000
      initPods: 5000
      measurePods: 2000
 - name: PreferredTopologySpreading
  workloadTemplate:
  - opcode: createNodes
    countParam: $initNodes
    nodeTemplatePath: config/node-default.yaml
    labelNodePrepareStrategy:
      labelKey: "topology.kubernetes.io/zone"
      labelValues: ["moon-1", "moon-2", "moon-3"]
  - opcode: createPods
    countParam: $initPods
    podTemplatePath: config/pod-default.yaml
  - opcode: createPods
    countParam: $measurePods
    podTemplatePath: config/pod-with-preferred-topology-spreading.yaml
    collectMetrics: true
  workloads:
  - name: 500Nodes
    params:
      initNodes: 500
      initPods: 1000
      measurePods: 1000
  - name: 5000Nodes
    params:
      initNodes: 5000
      initPods: 5000
      measurePods: 2000
 - name: MixedSchedulingBasePod
  workloadTemplate:
  - opcode: createNodes
    countParam: $initNodes
    nodeTemplatePath: config/node-default.yaml
    labelNodePrepareStrategy:
      labelKey: "topology.kubernetes.io/zone"
      labelValues: ["zone1"]
  - opcode: createPods
    countParam: $initPods
    podTemplatePath: config/pod-default.yaml
    namespace: sched-setup
  - opcode: createPods
    countParam: $initPods
    podTemplatePath: config/pod-with-pod-affinity.yaml
    namespace: sched-setup
  - opcode: createPods
    countParam: $initPods
    podTemplatePath: config/pod-with-pod-anti-affinity.yaml
    namespace: sched-setup
  - opcode: createPods
    countParam: $initPods
    podTemplatePath: config/pod-with-preferred-pod-affinity.yaml
    namespace: sched-setup
  - opcode: createPods
    countParam: $initPods
    podTemplatePath: config/pod-with-preferred-pod-anti-affinity.yaml
    namespace: sched-setup
  - opcode: createPods
    countParam: $measurePods
    podTemplatePath: config/pod-default.yaml
    collectMetrics: true
  workloads:
  - name: 500Nodes
    params:
      initNodes: 500
      initPods: 200
      measurePods: 1000
  - name: 5000Nodes
    params:
      initNodes: 5000
      initPods: 2000
      measurePods: 1000
 - name: Preemption
  workloadTemplate:
  - opcode: createNodes
    countParam: $initNodes
  - opcode: createPods
    countParam: $initPods
    podTemplatePath: config/pod-low-priority.yaml
  - opcode: createPods
    countParam: $measurePods
    podTemplatePath: config/pod-high-priority.yaml
    collectMetrics: true
  workloads:
  - name: 500Nodes
    params:
      initNodes: 500
      initPods: 2000
      measurePods: 500
  - name: 5000Nodes
    params:
      initNodes: 5000
      initPods: 20000
      measurePods: 5000
 - name: Unschedulable
  workloadTemplate:
  - opcode: createNodes
    countParam: $initNodes
  - opcode: createPods
    countParam: $initPods
    podTemplatePath: config/pod-large-cpu.yaml
    skipWaitToCompletion: true
  - opcode: createPods
    countParam: $measurePods
    podTemplatePath: config/pod-default.yaml
    collectMetrics: true
  workloads:
  - name: 500Nodes/200InitPods
    params:
      initNodes: 500
      initPods: 200
      measurePods: 1000
  - name: 5000Nodes/200InitPods
    params:
      initNodes: 5000
      initPods: 200
      measurePods: 5000
  - name: 5000Nodes/2000InitPods
    params:
      initNodes: 5000
      initPods: 2000
      measurePods: 5000
--- a/test/integration/scheduler_perf/scheduler_perf_legacy_test.go
+++ b/test/integration/scheduler_perf/scheduler_perf_legacy_test.go
@@ -448,7 +448,7 @@ func benchmarkScheduling(numExistingPods, minPods int,
 		clientset,
 		nodeStrategies,
 		"scheduler-perf-")
-	if err := nodePreparer.PrepareNodes(); err != nil {
+	if err := nodePreparer.PrepareNodes(0); err != nil {
 		klog.Fatalf("%v", err)
 	}
 	defer nodePreparer.CleanupNodes()
--- a/test/integration/scheduler_perf/scheduler_perf_test.go
+++ b/test/integration/scheduler_perf/scheduler_perf_test.go
@@ -17,12 +17,17 @@ limitations under the License.
 package benchmark
 import (
 	"context"
 	"encoding/json"
 	"fmt"
 	"io/ioutil"
 	"strings"
 	"sync"
 	"testing"
 	"time"
 	v1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/util/wait"
 	utilfeature "k8s.io/apiserver/pkg/util/feature"
 	coreinformers "k8s.io/client-go/informers/core/v1"
 	clientset "k8s.io/client-go/kubernetes"
@@ -35,7 +40,10 @@ import (
 )
 const (
-	configFile = "config/performance-config.yaml"
+	configFile        = "config/performance-config.yaml"
 	createNodesOpcode = "createNodes"
 	createPodsOpcode  = "createPods"
 	barrierOpcode     = "barrier"
 )
 var (
@@ -51,87 +59,248 @@ var (
 	}
 )
-// testCase configures a test case to run the scheduler performance test. Users should be able to
+// testCase defines a set of test cases that intend to test the performance of
-// provide this via a YAML file.
+// similar workloads of varying sizes with shared overall settings such as
-//
+// feature gates and metrics collected.
 // It specifies nodes and pods in the cluster before running the test. It also specifies the pods to
 // schedule during the test. The config can be as simple as just specify number of nodes/pods, where
 // default spec will be applied. It also allows the user to specify a pod spec template for more
 // complicated test cases.
 //
 // It also specifies the metrics to be collected after the test. If nothing is specified, default metrics
 // such as scheduling throughput and latencies will be collected.
 type testCase struct {
-	// description of the test case
+	// Name of the testCase.
-	Desc string
+	Name string
-	// configures nodes in the cluster
+	// Feature gates to set before running the test. Optional.
 	Nodes nodeCase
 	// configures pods in the cluster before running the tests
 	InitPods []podCase
 	// configures the test to now wait for init pods to schedule before creating
 	// test pods.
 	SkipWaitUntilInitPodsScheduled bool
 	// pods to be scheduled during the test.
 	PodsToSchedule podCase
 	// optional, feature gates to set before running the test
 	FeatureGates map[featuregate.Feature]bool
-	// optional, replaces default defaultMetricsCollectorConfig if supplied.
+	// List of metrics to collect. Optional, defaults to
 	// defaultMetricsCollectorConfig if unspecified.
 	MetricsCollectorConfig *metricsCollectorConfig
 	// Template for sequence of ops that each workload must follow. Each op will
 	// be executed serially one after another. Each element of the list must be
 	// createNodesOp, createPodsOp, or barrierOp.
 	WorkloadTemplate []op
 	// List of workloads to run under this testCase.
 	Workloads []*workload
 	// TODO(#93792): reduce config toil by having a default pod and node spec per
 	// testCase? CreatePods and CreateNodes ops will inherit these unless
 	// manually overridden.
 }
-type nodeCase struct {
+func (tc *testCase) collectsMetrics() bool {
-	Num              int
+	for _, op := range tc.WorkloadTemplate {
 		if op.realOp.collectsMetrics() {
 			return true
 		}
 	}
 	return false
 }
 // workload is a subtest under a testCase that tests the scheduler performance
 // for a certain ordering of ops. The set of nodes created and pods scheduled
 // in a workload may be heterogenous.
 type workload struct {
 	// Name of the workload.
 	Name string
 	// Values of parameters used in the workloadTemplate.
 	Params map[string]int
 }
 // op is a dummy struct which stores the real op in itself.
 type op struct {
 	realOp realOp
 }
 // UnmarshalJSON is a custom unmarshaler for the op struct since we don't know
 // which op we're decoding at runtime.
 func (op *op) UnmarshalJSON(b []byte) error {
 	possibleOps := []realOp{
 		&createNodesOp{},
 		&createPodsOp{},
 		&barrierOp{},
 		// TODO(#93793): add a sleep timer op to simulate waiting?
 		// TODO(#94601): add a delete nodes op to simulate scaling behaviour?
 	}
 	var firstError error
 	for _, possibleOp := range possibleOps {
 		if err := json.Unmarshal(b, possibleOp); err == nil {
 			if err2 := possibleOp.isValid(true); err2 == nil {
 				op.realOp = possibleOp
 				return nil
 			} else if firstError == nil {
 				// Don't return an error yet. Even though this op is invalid, it may
 				// still match other possible ops.
 				firstError = err2
 			}
 		}
 	}
 	return fmt.Errorf("cannot unmarshal %s into any known op type: %w", string(b), firstError)
 }
 // realOp is an interface that is implemented by different structs. To evaluate
 // the validity of ops at parse-time, a isValid function must be implemented.
 type realOp interface {
 	// isValid verifies the validity of the op args such as node/pod count. Note
 	// that we don't catch undefined parameters at this stage.
 	isValid(allowParameterization bool) error
 	// collectsMetrics checks if the op collects metrics.
 	collectsMetrics() bool
 	// patchParams returns a patched realOp of the same type after substituting
 	// parameterizable values with workload-specific values. One should implement
 	// this method on the value receiver base type, not a pointer receiver base
 	// type, even though calls will be made from with a *realOp. This is because
 	// callers don't want the receiver to inadvertently modify the realOp
 	// (instead, it's returned as a return value).
 	patchParams(w *workload) (realOp, error)
 }
 func isValidParameterizable(val string) bool {
 	return strings.HasPrefix(val, "$")
 }
 // createNodesOp defines an op where nodes are created as a part of a workload.
 type createNodesOp struct {
 	// Must be "createNodes".
 	Opcode string
 	// Number of nodes to create. Parameterizable through CountParam.
 	Count int
 	// Template parameter for Count.
 	CountParam string
 	// Path to spec file describing the nodes to create. Optional.
 	NodeTemplatePath *string
-	// At most one of the following strategies can be defined. If not specified, default to TrivialNodePrepareStrategy.
+	// At most one of the following strategies can be defined. Optional, defaults
 	// to TrivialNodePrepareStrategy if unspecified.
 	NodeAllocatableStrategy  *testutils.NodeAllocatableStrategy
 	LabelNodePrepareStrategy *testutils.LabelNodePrepareStrategy
 	UniqueNodeLabelStrategy  *testutils.UniqueNodeLabelStrategy
 }
-type podCase struct {
+func (cno *createNodesOp) isValid(allowParameterization bool) error {
-	Num                               int
+	if cno.Opcode != createNodesOpcode {
-	PodTemplatePath                   *string
+		return fmt.Errorf("invalid opcode")
 	}
 	ok := (cno.Count > 0 ||
 		(cno.CountParam != "" && allowParameterization && isValidParameterizable(cno.CountParam)))
 	if !ok {
 		return fmt.Errorf("invalid Count=%d / CountParam=%q", cno.Count, cno.CountParam)
 	}
 	return nil
 }
 func (*createNodesOp) collectsMetrics() bool {
 	return false
 }
 func (cno createNodesOp) patchParams(w *workload) (realOp, error) {
 	if cno.CountParam != "" {
 		var ok bool
 		if cno.Count, ok = w.Params[cno.CountParam[1:]]; !ok {
 			return nil, fmt.Errorf("parameter %s is undefined", cno.CountParam)
 		}
 	}
 	return &cno, (&cno).isValid(false)
 }
 // createPodsOp defines an op where pods are scheduled as a part of a workload.
 // The test can block on the completion of this op before moving forward or
 // continue asynchronously.
 type createPodsOp struct {
 	// Must be "createPods".
 	Opcode string
 	// Number of pods to schedule. Parameterizable through CountParam.
 	Count int
 	// Template parameter for Count.
 	CountParam string
 	// Whether or not to enable metrics collection for this createPodsOp.
 	// Optional. Both CollectMetrics and SkipWaitToCompletion cannot be true at
 	// the same time for a particular createPodsOp.
 	CollectMetrics bool
 	// Namespace the pods should be created in. Optional, defaults to a unique
 	// namespace of the format "namespace-<number>".
 	Namespace *string
 	// Path to spec file describing the pods to schedule. Optional.
 	PodTemplatePath *string
 	// Whether or not to wait for all pods in this op to get scheduled. Optional,
 	// defaults to false.
 	SkipWaitToCompletion bool
 	// Persistent volume settings for the pods to be scheduled. Optional.
 	PersistentVolumeTemplatePath      *string
 	PersistentVolumeClaimTemplatePath *string
 }
-// simpleTestCases defines a set of test cases that share the same template (node spec, pod spec, etc)
+func (cpo *createPodsOp) isValid(allowParameterization bool) error {
-// with testParams(e.g., NumNodes) being overridden. This provides a convenient way to define multiple tests
+	if cpo.Opcode != createPodsOpcode {
-// with various sizes.
+		return fmt.Errorf("invalid opcode")
-type simpleTestCases struct {
+	}
-	Template testCase
+	ok := (cpo.Count > 0 ||
-	Params   []testParams
+		(cpo.CountParam != "" && allowParameterization && isValidParameterizable(cpo.CountParam)))
 	if !ok {
 		return fmt.Errorf("invalid Count=%d / CountParam=%q", cpo.Count, cpo.CountParam)
 	}
 	if cpo.CollectMetrics && cpo.SkipWaitToCompletion {
 		// While it's technically possible to achieve this, the additional
 		// complexity is not worth it, especially given that we don't have any
 		// use-cases right now.
 		return fmt.Errorf("collectMetrics and skipWaitToCompletion cannot be true at the same time")
 	}
 	return nil
 }
-type testParams struct {
+func (cpo *createPodsOp) collectsMetrics() bool {
-	NumNodes          int
+	return cpo.CollectMetrics
 	NumInitPods       []int
 	NumPodsToSchedule int
 }
-type testDataCollector interface {
+func (cpo createPodsOp) patchParams(w *workload) (realOp, error) {
-	run(stopCh chan struct{})
+	if cpo.CountParam != "" {
-	collect() []DataItem
+		var ok bool
 		if cpo.Count, ok = w.Params[cpo.CountParam[1:]]; !ok {
 			return nil, fmt.Errorf("parameter %s is undefined", cpo.CountParam)
 		}
 	}
 	return &cpo, (&cpo).isValid(false)
 }
 // barrierOp defines an op that can be used to wait until all scheduled pods of
 // one or many namespaces have been bound to nodes. This is useful when pods
 // were scheduled with SkipWaitToCompletion set to true. A barrierOp is added
 // at the end of each each workload automatically.
 type barrierOp struct {
 	// Must be "barrier".
 	Opcode string
 	// Namespaces to block on. Empty array or not specifying this field signifies
 	// that the barrier should block on all namespaces.
 	Namespaces []string
 }
 func (bo *barrierOp) isValid(allowParameterization bool) error {
 	if bo.Opcode != barrierOpcode {
 		return fmt.Errorf("invalid opcode")
 	}
 	return nil
 }
 func (*barrierOp) collectsMetrics() bool {
 	return false
 }
 func (bo barrierOp) patchParams(w *workload) (realOp, error) {
 	return &bo, nil
 }
 func BenchmarkPerfScheduling(b *testing.B) {
-	dataItems := DataItems{Version: "v1"}
+	testCases, err := getTestCases(configFile)
 	tests, err := parseTestCases(configFile)
 	if err != nil {
 		b.Fatal(err)
 	}
 	if err = validateTestCases(testCases); err != nil {
 		b.Fatal(err)
 	}
-	for _, test := range tests {
+	dataItems := DataItems{Version: "v1"}
-		initPods := 0
+	for _, tc := range testCases {
-		for _, p := range test.InitPods {
+		b.Run(tc.Name, func(b *testing.B) {
-			initPods += p.Num
+			for _, w := range tc.Workloads {
-		}
+				b.Run(w.Name, func(b *testing.B) {
-		name := fmt.Sprintf("%v/%vNodes/%vInitPods/%vPodsToSchedule", test.Desc, test.Nodes.Num, initPods, test.PodsToSchedule.Num)
+					for feature, flag := range tc.FeatureGates {
-		b.Run(name, func(b *testing.B) {
+						defer featuregatetesting.SetFeatureGateDuringTest(b, utilfeature.DefaultFeatureGate, feature, flag)()
-			for feature, flag := range test.FeatureGates {
+					}
-				defer featuregatetesting.SetFeatureGateDuringTest(b, utilfeature.DefaultFeatureGate, feature, flag)()
+					dataItems.DataItems = append(dataItems.DataItems, runWorkload(b, tc, w)...)
 				})
 			}
 			dataItems.DataItems = append(dataItems.DataItems, perfScheduling(test, b)...)
 		})
 	}
 	if err := dataItems2JSONFile(dataItems, b.Name()); err != nil {
@@ -139,202 +308,219 @@ func BenchmarkPerfScheduling(b *testing.B) {
 	}
 }
-func perfScheduling(test testCase, b *testing.B) []DataItem {
+func runWorkload(b *testing.B, tc *testCase, w *workload) []DataItem {
 	// 30 minutes should be plenty enough even for the 5000-node tests.
 	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Minute)
 	defer cancel()
 	finalFunc, podInformer, clientset := mustSetupScheduler()
-	defer finalFunc()
+	b.Cleanup(finalFunc)
 	nodePreparer, err := getNodePreparer(test.Nodes, clientset)
 	if err != nil {
 		b.Fatal(err)
 	}
 	if err := nodePreparer.PrepareNodes(); err != nil {
 		b.Fatal(err)
 	}
 	defer nodePreparer.CleanupNodes()
 	total := 0
 	for _, p := range test.InitPods {
 		if err := createPods(setupNamespace, p, clientset); err != nil {
 			b.Fatal(err)
 		}
 		total += p.Num
 	}
 	if !test.SkipWaitUntilInitPodsScheduled {
 		if err := waitNumPodsScheduled(b, total, podInformer, setupNamespace); err != nil {
 			b.Fatal(err)
 		}
 	}
 	// start benchmark
 	b.ResetTimer()
 	// Start test data collectors.
 	stopCh := make(chan struct{})
 	collectors := getTestDataCollectors(test, podInformer, b)
 	for _, collector := range collectors {
 		go collector.run(stopCh)
 	}
 	// Schedule the main workload
 	if err := createPods(testNamespace, test.PodsToSchedule, clientset); err != nil {
 		b.Fatal(err)
 	}
 	if err := waitNumPodsScheduled(b, test.PodsToSchedule.Num, podInformer, testNamespace); err != nil {
 		b.Fatal(err)
 	}
 	close(stopCh)
 	// Note: without this line we're taking the overhead of defer() into account.
 	b.StopTimer()
 	var mu sync.Mutex
 	var dataItems []DataItem
-	for _, collector := range collectors {
+	numPodsScheduledPerNamespace := make(map[string]int)
-		dataItems = append(dataItems, collector.collect()...)
+	nextNodeIndex := 0
 	for opIndex, op := range tc.WorkloadTemplate {
 		realOp, err := op.realOp.patchParams(w)
 		if err != nil {
 			b.Fatalf("op %d: %v", opIndex, err)
 		}
 		select {
 		case <-ctx.Done():
 			b.Fatalf("op %d: %v", opIndex, ctx.Err())
 		default:
 		}
 		switch concreteOp := realOp.(type) {
 		case *createNodesOp:
 			nodePreparer, err := getNodePreparer(fmt.Sprintf("node-%d-", opIndex), concreteOp, clientset)
 			if err != nil {
 				b.Fatalf("op %d: %v", opIndex, err)
 			}
 			if err := nodePreparer.PrepareNodes(nextNodeIndex); err != nil {
 				b.Fatalf("op %d: %v", opIndex, err)
 			}
 			b.Cleanup(func() {
 				nodePreparer.CleanupNodes()
 			})
 			nextNodeIndex += concreteOp.Count
 		case *createPodsOp:
 			var namespace string
 			if concreteOp.Namespace != nil {
 				namespace = *concreteOp.Namespace
 			} else {
 				namespace = fmt.Sprintf("namespace-%d", opIndex)
 			}
 			var collectors []testDataCollector
 			var collectorCtx context.Context
 			var collectorCancel func()
 			if concreteOp.CollectMetrics {
 				collectorCtx, collectorCancel = context.WithCancel(ctx)
 				defer collectorCancel()
 				collectors = getTestDataCollectors(podInformer, fmt.Sprintf("%s/%s", b.Name(), namespace), namespace, tc.MetricsCollectorConfig)
 				for _, collector := range collectors {
 					go collector.run(collectorCtx)
 				}
 			}
 			if err := createPods(namespace, concreteOp, clientset); err != nil {
 				b.Fatalf("op %d: %v", opIndex, err)
 			}
 			if concreteOp.SkipWaitToCompletion {
 				// Only record those namespaces that may potentially require barriers
 				// in the future.
 				if _, ok := numPodsScheduledPerNamespace[namespace]; ok {
 					numPodsScheduledPerNamespace[namespace] += concreteOp.Count
 				} else {
 					numPodsScheduledPerNamespace[namespace] = concreteOp.Count
 				}
 			} else {
 				if err := waitUntilPodsScheduledInNamespace(ctx, podInformer, b.Name(), namespace, concreteOp.Count); err != nil {
 					b.Fatalf("op %d: error in waiting for pods to get scheduled: %v", opIndex, err)
 				}
 			}
 			if concreteOp.CollectMetrics {
 				// CollectMetrics and SkipWaitToCompletion can never be true at the
 				// same time, so if we're here, it means that all pods have been
 				// scheduled.
 				collectorCancel()
 				mu.Lock()
 				for _, collector := range collectors {
 					dataItems = append(dataItems, collector.collect()...)
 				}
 				mu.Unlock()
 			}
 		case *barrierOp:
 			for _, namespace := range concreteOp.Namespaces {
 				if _, ok := numPodsScheduledPerNamespace[namespace]; !ok {
 					b.Fatalf("op %d: unknown namespace %s", opIndex, namespace)
 				}
 			}
 			if err := waitUntilPodsScheduled(ctx, podInformer, b.Name(), concreteOp.Namespaces, numPodsScheduledPerNamespace); err != nil {
 				b.Fatalf("op %d: %v", opIndex, err)
 			}
 			// At the end of the barrier, we can be sure that there are no pods
 			// pending scheduling in the namespaces that we just blocked on.
 			if len(concreteOp.Namespaces) == 0 {
 				numPodsScheduledPerNamespace = make(map[string]int)
 			} else {
 				for _, namespace := range concreteOp.Namespaces {
 					delete(numPodsScheduledPerNamespace, namespace)
 				}
 			}
 		default:
 			b.Fatalf("op %d: invalid op %v", opIndex, concreteOp)
 		}
 	}
 	if err := waitUntilPodsScheduled(ctx, podInformer, b.Name(), nil, numPodsScheduledPerNamespace); err != nil {
 		// Any pending pods must be scheduled before this test can be considered to
 		// be complete.
 		b.Fatal(err)
 	}
 	return dataItems
 }
-func waitNumPodsScheduled(b *testing.B, num int, podInformer coreinformers.PodInformer, namespace string) error {
+type testDataCollector interface {
-	for {
+	run(ctx context.Context)
-		scheduled, err := getScheduledPods(podInformer, namespace)
+	collect() []DataItem
 		if err != nil {
 			return err
 		}
 		if len(scheduled) >= num {
 			break
 		}
 		klog.Infof("%s: got %d existing pods, required: %d", b.Name(), len(scheduled), num)
 		time.Sleep(1 * time.Second)
 	}
 	return nil
 }
-func getTestDataCollectors(tc testCase, podInformer coreinformers.PodInformer, b *testing.B) []testDataCollector {
+func getTestDataCollectors(podInformer coreinformers.PodInformer, name, namespace string, mcc *metricsCollectorConfig) []testDataCollector {
-	collectors := []testDataCollector{newThroughputCollector(podInformer, map[string]string{"Name": b.Name()}, []string{testNamespace})}
+	if mcc == nil {
-	metricsCollectorConfig := defaultMetricsCollectorConfig
+		mcc = &defaultMetricsCollectorConfig
-	if tc.MetricsCollectorConfig != nil {
+	}
-		metricsCollectorConfig = *tc.MetricsCollectorConfig
+	return []testDataCollector{
 		newThroughputCollector(podInformer, map[string]string{"Name": name}, []string{namespace}),
 		newMetricsCollector(mcc, map[string]string{"Name": name}),
 	}
 	collectors = append(collectors, newMetricsCollector(metricsCollectorConfig, map[string]string{"Name": b.Name()}))
 	return collectors
 }
-func getNodePreparer(nc nodeCase, clientset clientset.Interface) (testutils.TestNodePreparer, error) {
+func getNodePreparer(prefix string, cno *createNodesOp, clientset clientset.Interface) (testutils.TestNodePreparer, error) {
 	var nodeStrategy testutils.PrepareNodeStrategy = &testutils.TrivialNodePrepareStrategy{}
-	if nc.NodeAllocatableStrategy != nil {
+	if cno.NodeAllocatableStrategy != nil {
-		nodeStrategy = nc.NodeAllocatableStrategy
+		nodeStrategy = cno.NodeAllocatableStrategy
-	} else if nc.LabelNodePrepareStrategy != nil {
+	} else if cno.LabelNodePrepareStrategy != nil {
-		nodeStrategy = nc.LabelNodePrepareStrategy
+		nodeStrategy = cno.LabelNodePrepareStrategy
-	} else if nc.UniqueNodeLabelStrategy != nil {
+	} else if cno.UniqueNodeLabelStrategy != nil {
-		nodeStrategy = nc.UniqueNodeLabelStrategy
+		nodeStrategy = cno.UniqueNodeLabelStrategy
 	}
-	if nc.NodeTemplatePath != nil {
+	if cno.NodeTemplatePath != nil {
-		node, err := getNodeSpecFromFile(nc.NodeTemplatePath)
+		node, err := getNodeSpecFromFile(cno.NodeTemplatePath)
 		if err != nil {
 			return nil, err
 		}
 		return framework.NewIntegrationTestNodePreparerWithNodeSpec(
 			clientset,
-			[]testutils.CountToStrategy{{Count: nc.Num, Strategy: nodeStrategy}},
+			[]testutils.CountToStrategy{{Count: cno.Count, Strategy: nodeStrategy}},
 			node,
 		), nil
 	}
 	return framework.NewIntegrationTestNodePreparer(
 		clientset,
-		[]testutils.CountToStrategy{{Count: nc.Num, Strategy: nodeStrategy}},
+		[]testutils.CountToStrategy{{Count: cno.Count, Strategy: nodeStrategy}},
-		"scheduler-perf-",
+		prefix,
 	), nil
 }
-func createPods(ns string, pc podCase, clientset clientset.Interface) error {
+func createPods(namespace string, cpo *createPodsOp, clientset clientset.Interface) error {
-	strategy, err := getPodStrategy(pc)
+	strategy, err := getPodStrategy(cpo)
 	if err != nil {
 		return err
 	}
 	config := testutils.NewTestPodCreatorConfig()
-	config.AddStrategy(ns, pc.Num, strategy)
+	config.AddStrategy(namespace, cpo.Count, strategy)
 	podCreator := testutils.NewTestPodCreator(clientset, config)
 	return podCreator.CreatePods()
 }
-func getPodStrategy(pc podCase) (testutils.TestPodCreateStrategy, error) {
+// waitUntilPodsScheduledInNamespace blocks until all pods in the given
-	basePod := makeBasePod()
+// namespace are scheduled. Times out after 10 minutes because even at the
-	if pc.PodTemplatePath != nil {
+// lowest observed QPS of ~10 pods/sec, a 5000-node test should complete.
-		var err error
+func waitUntilPodsScheduledInNamespace(ctx context.Context, podInformer coreinformers.PodInformer, name string, namespace string, wantCount int) error {
-		basePod, err = getPodSpecFromFile(pc.PodTemplatePath)
+	return wait.PollImmediate(1*time.Second, 10*time.Minute, func() (bool, error) {
 		select {
 		case <-ctx.Done():
 			return true, ctx.Err()
 		default:
 		}
 		scheduled, err := getScheduledPods(podInformer, namespace)
 		if err != nil {
-			return nil, err
+			return false, err
 		}
 		if len(scheduled) >= wantCount {
 			return true, nil
 		}
 		klog.Infof("%s: namespace %s: got %d pods, want %d", name, namespace, len(scheduled), wantCount)
 		return false, nil
 	})
 }
 // waitUntilPodsScheduled blocks until the all pods in the given namespaces are
 // scheduled.
 func waitUntilPodsScheduled(ctx context.Context, podInformer coreinformers.PodInformer, name string, namespaces []string, numPodsScheduledPerNamespace map[string]int) error {
 	// If unspecified, default to all known namespaces.
 	if len(namespaces) == 0 {
 		for namespace := range numPodsScheduledPerNamespace {
 			namespaces = append(namespaces, namespace)
 		}
 	}
-	if pc.PersistentVolumeClaimTemplatePath == nil {
+	for _, namespace := range namespaces {
-		return testutils.NewCustomCreatePodStrategy(basePod), nil
+		select {
-	}
+		case <-ctx.Done():
-
+			return ctx.Err()
-	pvTemplate, err := getPersistentVolumeSpecFromFile(pc.PersistentVolumeTemplatePath)
+		default:
-	if err != nil {
+		}
-		return nil, err
+		wantCount, ok := numPodsScheduledPerNamespace[namespace]
-	}
+		if !ok {
-	pvcTemplate, err := getPersistentVolumeClaimSpecFromFile(pc.PersistentVolumeClaimTemplatePath)
+			return fmt.Errorf("unknown namespace %s", namespace)
-	if err != nil {
+		}
-		return nil, err
+		if err := waitUntilPodsScheduledInNamespace(ctx, podInformer, name, namespace, wantCount); err != nil {
-	}
+			return fmt.Errorf("error waiting for pods in namespace %q: %w", namespace, err)
 	return testutils.NewCreatePodWithPersistentVolumeStrategy(pvcTemplate, getCustomVolumeFactory(pvTemplate), basePod), nil
 }
 func parseTestCases(path string) ([]testCase, error) {
 	var simpleTests []simpleTestCases
 	if err := getSpecFromFile(&path, &simpleTests); err != nil {
 		return nil, fmt.Errorf("parsing test cases: %v", err)
 	}
 	testCases := make([]testCase, 0)
 	for _, s := range simpleTests {
 		testCase := s.Template
 		for _, p := range s.Params {
 			testCase.Nodes.Num = p.NumNodes
 			testCase.InitPods = append([]podCase(nil), testCase.InitPods...)
 			for i, v := range p.NumInitPods {
 				testCase.InitPods[i].Num = v
 			}
 			testCase.PodsToSchedule.Num = p.NumPodsToSchedule
 			testCases = append(testCases, testCase)
 		}
 	}
-
+	return nil
 	return testCases, nil
 }
 func getNodeSpecFromFile(path *string) (*v1.Node, error) {
 	nodeSpec := &v1.Node{}
 	if err := getSpecFromFile(path, nodeSpec); err != nil {
 		return nil, fmt.Errorf("parsing Node: %v", err)
 	}
 	return nodeSpec, nil
 }
 func getPodSpecFromFile(path *string) (*v1.Pod, error) {
 	podSpec := &v1.Pod{}
 	if err := getSpecFromFile(path, podSpec); err != nil {
 		return nil, fmt.Errorf("parsing Pod: %v", err)
 	}
 	return podSpec, nil
 }
 func getPersistentVolumeSpecFromFile(path *string) (*v1.PersistentVolume, error) {
 	persistentVolumeSpec := &v1.PersistentVolume{}
 	if err := getSpecFromFile(path, persistentVolumeSpec); err != nil {
 		return nil, fmt.Errorf("parsing PersistentVolume: %v", err)
 	}
 	return persistentVolumeSpec, nil
 }
 func getPersistentVolumeClaimSpecFromFile(path *string) (*v1.PersistentVolumeClaim, error) {
 	persistentVolumeClaimSpec := &v1.PersistentVolumeClaim{}
 	if err := getSpecFromFile(path, persistentVolumeClaimSpec); err != nil {
 		return nil, fmt.Errorf("parsing PersistentVolumeClaim: %v", err)
 	}
 	return persistentVolumeClaimSpec, nil
 }
 func getSpecFromFile(path *string, spec interface{}) error {
@@ -342,7 +528,95 @@ func getSpecFromFile(path *string, spec interface{}) error {
 	if err != nil {
 		return err
 	}
-	return yaml.Unmarshal(bytes, spec)
+	return yaml.UnmarshalStrict(bytes, spec)
 }
 func getTestCases(path string) ([]*testCase, error) {
 	testCases := make([]*testCase, 0)
 	if err := getSpecFromFile(&path, &testCases); err != nil {
 		return nil, fmt.Errorf("parsing test cases: %w", err)
 	}
 	return testCases, nil
 }
 func validateTestCases(testCases []*testCase) error {
 	if len(testCases) == 0 {
 		return fmt.Errorf("no test cases defined")
 	}
 	for _, tc := range testCases {
 		if len(tc.Workloads) == 0 {
 			return fmt.Errorf("%s: no workloads defined", tc.Name)
 		}
 		if len(tc.WorkloadTemplate) == 0 {
 			return fmt.Errorf("%s: no ops defined", tc.Name)
 		}
 		// Make sure there's at least one CreatePods op with collectMetrics set to
 		// true in each workload. What's the point of running a performance
 		// benchmark if no statistics are collected for reporting?
 		if !tc.collectsMetrics() {
 			return fmt.Errorf("%s: no op in the workload template collects metrics", tc.Name)
 		}
 		// TODO(#93795): make sure each workload within a test case has a unique
 		// name? The name is used to identify the stats in benchmark reports.
 		// TODO(#94404): check for unused template parameters? Probably a typo.
 	}
 	return nil
 }
 func getPodStrategy(cpo *createPodsOp) (testutils.TestPodCreateStrategy, error) {
 	basePod := makeBasePod()
 	if cpo.PodTemplatePath != nil {
 		var err error
 		basePod, err = getPodSpecFromFile(cpo.PodTemplatePath)
 		if err != nil {
 			return nil, err
 		}
 	}
 	if cpo.PersistentVolumeClaimTemplatePath == nil {
 		return testutils.NewCustomCreatePodStrategy(basePod), nil
 	}
 	pvTemplate, err := getPersistentVolumeSpecFromFile(cpo.PersistentVolumeTemplatePath)
 	if err != nil {
 		return nil, err
 	}
 	pvcTemplate, err := getPersistentVolumeClaimSpecFromFile(cpo.PersistentVolumeClaimTemplatePath)
 	if err != nil {
 		return nil, err
 	}
 	return testutils.NewCreatePodWithPersistentVolumeStrategy(pvcTemplate, getCustomVolumeFactory(pvTemplate), basePod), nil
 }
 func getNodeSpecFromFile(path *string) (*v1.Node, error) {
 	nodeSpec := &v1.Node{}
 	if err := getSpecFromFile(path, nodeSpec); err != nil {
 		return nil, fmt.Errorf("parsing Node: %w", err)
 	}
 	return nodeSpec, nil
 }
 func getPodSpecFromFile(path *string) (*v1.Pod, error) {
 	podSpec := &v1.Pod{}
 	if err := getSpecFromFile(path, podSpec); err != nil {
 		return nil, fmt.Errorf("parsing Pod: %w", err)
 	}
 	return podSpec, nil
 }
 func getPersistentVolumeSpecFromFile(path *string) (*v1.PersistentVolume, error) {
 	persistentVolumeSpec := &v1.PersistentVolume{}
 	if err := getSpecFromFile(path, persistentVolumeSpec); err != nil {
 		return nil, fmt.Errorf("parsing PersistentVolume: %w", err)
 	}
 	return persistentVolumeSpec, nil
 }
 func getPersistentVolumeClaimSpecFromFile(path *string) (*v1.PersistentVolumeClaim, error) {
 	persistentVolumeClaimSpec := &v1.PersistentVolumeClaim{}
 	if err := getSpecFromFile(path, persistentVolumeClaimSpec); err != nil {
 		return nil, fmt.Errorf("parsing PersistentVolumeClaim: %w", err)
 	}
 	return persistentVolumeClaimSpec, nil
 }
 func getCustomVolumeFactory(pvTemplate *v1.PersistentVolume) func(id int) *v1.PersistentVolume {
--- a/test/integration/scheduler_perf/util.go
+++ b/test/integration/scheduler_perf/util.go
@@ -17,6 +17,7 @@ limitations under the License.
 package benchmark
 import (
 	"context"
 	"encoding/json"
 	"flag"
 	"fmt"
@@ -147,18 +148,18 @@ type metricsCollectorConfig struct {
 // metricsCollector collects metrics from legacyregistry.DefaultGatherer.Gather() endpoint.
 // Currently only Histrogram metrics are supported.
 type metricsCollector struct {
-	metricsCollectorConfig
+	*metricsCollectorConfig
 	labels map[string]string
 }
-func newMetricsCollector(config metricsCollectorConfig, labels map[string]string) *metricsCollector {
+func newMetricsCollector(config *metricsCollectorConfig, labels map[string]string) *metricsCollector {
 	return &metricsCollector{
 		metricsCollectorConfig: config,
 		labels:                 labels,
 	}
 }
-func (*metricsCollector) run(stopCh chan struct{}) {
+func (*metricsCollector) run(ctx context.Context) {
 	// metricCollector doesn't need to start before the tests, so nothing to do here.
 }
@@ -231,7 +232,7 @@ func newThroughputCollector(podInformer coreinformers.PodInformer, labels map[st
 	}
 }
-func (tc *throughputCollector) run(stopCh chan struct{}) {
+func (tc *throughputCollector) run(ctx context.Context) {
 	podsScheduled, err := getScheduledPods(tc.podInformer, tc.namespaces...)
 	if err != nil {
 		klog.Fatalf("%v", err)
@@ -239,8 +240,9 @@ func (tc *throughputCollector) run(stopCh chan struct{}) {
 	lastScheduledCount := len(podsScheduled)
 	for {
 		select {
-		case <-stopCh:
+		case <-ctx.Done():
 			return
 		// TODO(#94665): use time.Ticker instead
 		case <-time.After(throughputSampleFrequency):
 			podsScheduled, err := getScheduledPods(tc.podInformer, tc.namespaces...)
 			if err != nil {
--- a/test/utils/runners.go
+++ b/test/utils/runners.go
@@ -931,7 +931,7 @@ type CountToStrategy struct {
 }
 type TestNodePreparer interface {
-	PrepareNodes() error
+	PrepareNodes(nextNodeIndex int) error
 	CleanupNodes() error
 }