Merge pull request #93252 from adtac/scheduler-perf

scheduler_perf: refactor to allow arbitrary workloads
This commit is contained in:
Kubernetes Prow Robot 2020-09-17 14:08:46 -07:00 committed by GitHub
commit ff1d6e8c1d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 930 additions and 488 deletions

View File

@ -58,7 +58,7 @@ func NewIntegrationTestNodePreparerWithNodeSpec(client clientset.Interface, coun
} }
// PrepareNodes prepares countToStrategy test nodes. // PrepareNodes prepares countToStrategy test nodes.
func (p *IntegrationTestNodePreparer) PrepareNodes() error { func (p *IntegrationTestNodePreparer) PrepareNodes(nextNodeIndex int) error {
numNodes := 0 numNodes := 0
for _, v := range p.countToStrategy { for _, v := range p.countToStrategy {
numNodes += v.Count numNodes += v.Count
@ -103,11 +103,9 @@ func (p *IntegrationTestNodePreparer) PrepareNodes() error {
if err != nil { if err != nil {
klog.Fatalf("Error listing nodes: %v", err) klog.Fatalf("Error listing nodes: %v", err)
} }
index := 0 index := nextNodeIndex
sum := 0
for _, v := range p.countToStrategy { for _, v := range p.countToStrategy {
sum += v.Count for i := 0; i < v.Count; i, index = i+1, index+1 {
for ; index < sum; index++ {
if err := testutils.DoPrepareNode(p.client, &nodes.Items[index], v.Strategy); err != nil { if err := testutils.DoPrepareNode(p.client, &nodes.Items[index], v.Strategy); err != nil {
klog.Errorf("Aborting node preparation: %v", err) klog.Errorf("Aborting node preparation: %v", err)
return err return err
@ -119,14 +117,18 @@ func (p *IntegrationTestNodePreparer) PrepareNodes() error {
// CleanupNodes deletes existing test nodes. // CleanupNodes deletes existing test nodes.
func (p *IntegrationTestNodePreparer) CleanupNodes() error { func (p *IntegrationTestNodePreparer) CleanupNodes() error {
// TODO(#93794): make CleanupNodes only clean up the nodes created by this
// IntegrationTestNodePreparer to make this more intuitive.
nodes, err := GetReadySchedulableNodes(p.client) nodes, err := GetReadySchedulableNodes(p.client)
if err != nil { if err != nil {
klog.Fatalf("Error listing nodes: %v", err) klog.Fatalf("Error listing nodes: %v", err)
} }
var errRet error
for i := range nodes.Items { for i := range nodes.Items {
if err := p.client.CoreV1().Nodes().Delete(context.TODO(), nodes.Items[i].Name, metav1.DeleteOptions{}); err != nil { if err := p.client.CoreV1().Nodes().Delete(context.TODO(), nodes.Items[i].Name, metav1.DeleteOptions{}); err != nil {
klog.Errorf("Error while deleting Node: %v", err) klog.Errorf("Error while deleting Node: %v", err)
errRet = err
} }
} }
return nil return errRet
} }

View File

@ -0,0 +1 @@
BenchmarkPerfScheduling_*.json

View File

@ -48,6 +48,7 @@ go_test(
"//staging/src/k8s.io/api/storage/v1beta1:go_default_library", "//staging/src/k8s.io/api/storage/v1beta1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library", "//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//staging/src/k8s.io/client-go/informers/core/v1:go_default_library", "//staging/src/k8s.io/client-go/informers/core/v1:go_default_library",
"//staging/src/k8s.io/client-go/kubernetes:go_default_library", "//staging/src/k8s.io/client-go/kubernetes:go_default_library",

View File

@ -1,264 +1,426 @@
- template: - name: SchedulingBasic
desc: SchedulingBasic workloadTemplate:
initPods: - opcode: createNodes
- podTemplatePath: config/pod-default.yaml countParam: $initNodes
podsToSchedule: - opcode: createPods
podTemplatePath: config/pod-default.yaml countParam: $initPods
params: podTemplatePath: config/pod-default.yaml
- numNodes: 500 - opcode: createPods
numInitPods: [500] countParam: $measurePods
numPodsToSchedule: 1000 podTemplatePath: config/pod-default.yaml
- numNodes: 5000 collectMetrics: true
numInitPods: [5000] workloads:
numPodsToSchedule: 1000 - name: 500Nodes
- template: params:
desc: SchedulingPodAntiAffinity initNodes: 500
nodes: initPods: 500
uniqueNodeLabelStrategy: measurePods: 1000
labelKey: kubernetes.io/hostname - name: 5000Nodes
initPods: params:
- podTemplatePath: config/pod-with-pod-anti-affinity.yaml initNodes: 5000
podsToSchedule: initPods: 1000
podTemplatePath: config/pod-with-pod-anti-affinity.yaml measurePods: 1000
params:
- numNodes: 500 - name: SchedulingPodAntiAffinity
numInitPods: [100] workloadTemplate:
numPodsToSchedule: 400 - opcode: createNodes
- numNodes: 5000 countParam: $initNodes
numInitPods: [1000] uniqueNodeLabelStrategy:
numPodsToSchedule: 1000 labelKey: kubernetes.io/hostname
- template: - opcode: createPods
desc: SchedulingSecrets countParam: $initPods
initPods: podTemplatePath: config/pod-with-pod-anti-affinity.yaml
- podTemplatePath: config/pod-with-secret-volume.yaml namespace: sched-setup
podsToSchedule: - opcode: createPods
podTemplatePath: config/pod-with-secret-volume.yaml countParam: $measurePods
params: podTemplatePath: config/pod-with-pod-anti-affinity.yaml
- numNodes: 500 collectMetrics: true
numInitPods: [500] namespace: sched-test
numPodsToSchedule: 1000 workloads:
- numNodes: 5000 - name: 500Nodes
numInitPods: [5000] params:
numPodsToSchedule: 1000 initNodes: 500
- template: initPods: 100
desc: SchedulingInTreePVs measurePods: 400
initPods: - name: 5000Nodes
- persistentVolumeTemplatePath: config/pv-aws.yaml params:
persistentVolumeClaimTemplatePath: config/pvc.yaml initNodes: 500
podsToSchedule: initPods: 100
persistentVolumeTemplatePath: config/pv-aws.yaml measurePods: 400
persistentVolumeClaimTemplatePath: config/pvc.yaml
params: - name: SchedulingSecrets
- numNodes: 500 workloadTemplate:
numInitPods: [500] - opcode: createNodes
numPodsToSchedule: 1000 countParam: $initNodes
- numNodes: 5000 - opcode: createPods
numInitPods: [5000] countParam: $initPods
numPodsToSchedule: 1000 podTemplatePath: config/pod-with-secret-volume.yaml
- template: - opcode: createPods
desc: SchedulingMigratedInTreePVs countParam: $measurePods
nodes: podTemplatePath: config/pod-with-secret-volume.yaml
nodeTemplatePath: config/node-default.yaml collectMetrics: true
nodeAllocatableStrategy: workloads:
nodeAllocatable: - name: 500Nodes
attachable-volumes-csi-ebs.csi.aws.com: 39 params:
csiNodeAllocatable: initNodes: 500
ebs.csi.aws.com: initPods: 500
count: 39 measurePods: 1000
migratedPlugins: - name: 5000Nodes
- "kubernetes.io/aws-ebs" params:
initPods: initNodes: 5000
- persistentVolumeTemplatePath: config/pv-aws.yaml initPods: 5000
persistentVolumeClaimTemplatePath: config/pvc.yaml measurePods: 1000
podsToSchedule:
persistentVolumeTemplatePath: config/pv-aws.yaml - name: SchedulingInTreePVs
persistentVolumeClaimTemplatePath: config/pvc.yaml workloadTemplate:
featureGates: - opcode: createNodes
CSIMigration: true countParam: $initNodes
CSIMigrationAWS: true - opcode: createPods
params: countParam: $initPods
- numNodes: 500 persistentVolumeTemplatePath: config/pv-aws.yaml
numInitPods: [500] persistentVolumeClaimTemplatePath: config/pvc.yaml
numPodsToSchedule: 1000 - opcode: createPods
- numNodes: 5000 countParam: $measurePods
numInitPods: [5000] persistentVolumeTemplatePath: config/pv-aws.yaml
numPodsToSchedule: 1000 persistentVolumeClaimTemplatePath: config/pvc.yaml
- template: collectMetrics: true
desc: SchedulingCSIPVs workloads:
nodes: - name: 500Nodes
nodeTemplatePath: config/node-default.yaml params:
nodeAllocatableStrategy: initNodes: 500
nodeAllocatable: initPods: 500
attachable-volumes-csi-ebs.csi.aws.com: 39 measurePods: 1000
csiNodeAllocatable: - name: 5000Nodes
ebs.csi.aws.com: params:
count: 39 initNodes: 5000
initPods: initPods: 5000
- persistentVolumeTemplatePath: config/pv-csi.yaml measurePods: 1000
persistentVolumeClaimTemplatePath: config/pvc.yaml
podsToSchedule: - name: SchedulingMigratedInTreePVs
persistentVolumeTemplatePath: config/pv-csi.yaml featureGates:
persistentVolumeClaimTemplatePath: config/pvc.yaml CSIMigration: true
params: CSIMigrationAWS: true
- numNodes: 500 workloadTemplate:
numInitPods: [500] - opcode: createNodes
numPodsToSchedule: 1000 countParam: $initNodes
- numNodes: 5000 nodeTemplatePath: config/node-default.yaml
numInitPods: [5000] nodeAllocatableStrategy:
numPodsToSchedule: 1000 nodeAllocatable:
- template: attachable-volumes-csi-ebs.csi.aws.com: "39"
desc: SchedulingPodAffinity csiNodeAllocatable:
nodes: ebs.csi.aws.com:
nodeTemplatePath: config/node-default.yaml count: 39
labelNodePrepareStrategy: migratedPlugins:
labelKey: "failure-domain.beta.kubernetes.io/zone" - "kubernetes.io/aws-ebs"
labelValues: ["zone1"] - opcode: createPods
initPods: countParam: $initPods
- podTemplatePath: config/pod-with-pod-affinity.yaml persistentVolumeTemplatePath: config/pv-aws.yaml
podsToSchedule: persistentVolumeClaimTemplatePath: config/pvc.yaml
podTemplatePath: config/pod-with-pod-affinity.yaml - opcode: createPods
params: countParam: $measurePods
- numNodes: 500 persistentVolumeTemplatePath: config/pv-aws.yaml
numInitPods: [500] persistentVolumeClaimTemplatePath: config/pvc.yaml
numPodsToSchedule: 1000 collectMetrics: true
- numNodes: 5000 workloads:
numInitPods: [5000] - name: 500Nodes
numPodsToSchedule: 1000 params:
- template: initNodes: 500
desc: SchedulingPreferredPodAffinity initPods: 500
nodes: measurePods: 1000
uniqueNodeLabelStrategy: - name: 5000Nodes
labelKey: kubernetes.io/hostname params:
initPods: initNodes: 5000
- podTemplatePath: config/pod-with-preferred-pod-affinity.yaml initPods: 5000
podsToSchedule: measurePods: 1000
podTemplatePath: config/pod-with-preferred-pod-affinity.yaml
params: - name: SchedulingCSIPVs
- numNodes: 500 workloadTemplate:
numInitPods: [500] - opcode: createNodes
numPodsToSchedule: 1000 countParam: $initNodes
- numNodes: 5000 nodeTemplatePath: config/node-default.yaml
numInitPods: [5000] nodeAllocatableStrategy:
numPodsToSchedule: 1000 nodeAllocatable:
- template: attachable-volumes-csi-ebs.csi.aws.com: "39"
desc: SchedulingPreferredPodAntiAffinity csiNodeAllocatable:
nodes: ebs.csi.aws.com:
uniqueNodeLabelStrategy: count: 39
labelKey: kubernetes.io/hostname - opcode: createPods
initPods: countParam: $initPods
- podTemplatePath: config/pod-with-preferred-pod-anti-affinity.yaml persistentVolumeTemplatePath: config/pv-csi.yaml
podsToSchedule: persistentVolumeClaimTemplatePath: config/pvc.yaml
podTemplatePath: config/pod-with-preferred-pod-anti-affinity.yaml - opcode: createPods
params: countParam: $measurePods
- numNodes: 500 persistentVolumeTemplatePath: config/pv-csi.yaml
numInitPods: [500] persistentVolumeClaimTemplatePath: config/pvc.yaml
numPodsToSchedule: 1000 collectMetrics: true
- numNodes: 5000 workloads:
numInitPods: [5000] - name: 500Nodes
numPodsToSchedule: 1000 params:
- template: initNodes: 500
desc: SchedulingNodeAffinity initPods: 500
nodes: measurePods: 1000
nodeTemplatePath: config/node-default.yaml - name: 5000Nodes
labelNodePrepareStrategy: params:
labelKey: "failure-domain.beta.kubernetes.io/zone" initNodes: 5000
labelValues: ["zone1"] initPods: 5000
initPods: measurePods: 1000
- podTemplatePath: config/pod-with-node-affinity.yaml
podsToSchedule: - name: SchedulingPodAffinity
podTemplatePath: config/pod-with-node-affinity.yaml workloadTemplate:
params: - opcode: createNodes
- numNodes: 500 countParam: $initNodes
numInitPods: [500] nodeTemplatePath: config/node-default.yaml
numPodsToSchedule: 1000 labelNodePrepareStrategy:
- numNodes: 5000 labelKey: "failure-domain.beta.kubernetes.io/zone"
numInitPods: [5000] labelValues: ["zone1"]
numPodsToSchedule: 1000 - opcode: createPods
- template: countParam: $initPods
desc: TopologySpreading podTemplatePath: config/pod-with-pod-affinity.yaml
nodes: namespace: sched-setup
nodeTemplatePath: config/node-default.yaml - opcode: createPods
labelNodePrepareStrategy: countParam: $measurePods
labelKey: "topology.kubernetes.io/zone" podTemplatePath: config/pod-with-pod-affinity.yaml
labelValues: ["moon-1", "moon-2", "moon-3"] namespace: sched-test
initPods: collectMetrics: true
- podTemplatePath: config/pod-default.yaml workloads:
podsToSchedule: - name: 500Nodes
podTemplatePath: config/pod-with-topology-spreading.yaml params:
params: initNodes: 500
- numNodes: 500 initPods: 500
numInitPods: [1000] measurePods: 1000
numPodsToSchedule: 1000 - name: 5000Nodes
- numNodes: 5000 params:
numInitPods: [5000] initNodes: 5000
numPodsToSchedule: 2000 initPods: 5000
- template: measurePods: 1000
desc: PreferredTopologySpreading
nodes: - name: SchedulingPreferredPodAffinity
nodeTemplatePath: config/node-default.yaml workloadTemplate:
labelNodePrepareStrategy: - opcode: createNodes
labelKey: "topology.kubernetes.io/zone" countParam: $initNodes
labelValues: ["moon-1", "moon-2", "moon-3"] uniqueNodeLabelStrategy:
initPods: labelKey: kubernetes.io/hostname
- podTemplatePath: config/pod-default.yaml - opcode: createPods
podsToSchedule: countParam: $initPods
podTemplatePath: config/pod-with-preferred-topology-spreading.yaml podTemplatePath: config/pod-with-preferred-pod-affinity.yaml
params: namespace: sched-setup
- numNodes: 500 - opcode: createPods
numInitPods: [1000] countParam: $measurePods
numPodsToSchedule: 1000 podTemplatePath: config/pod-with-preferred-pod-affinity.yaml
- numNodes: 5000 namespace: sched-test
numInitPods: [5000] collectMetrics: true
numPodsToSchedule: 2000 workloads:
- template: - name: 500Nodes
desc: MixedSchedulingBasePod params:
nodes: initNodes: 500
nodeTemplatePath: config/node-default.yaml initPods: 500
labelNodePrepareStrategy: measurePods: 1000
labelKey: "topology.kubernetes.io/zone" - name: 5000Nodes
labelValues: ["zone1"] params:
initPods: initNodes: 5000
- podTemplatePath: config/pod-default.yaml initPods: 5000
- podTemplatePath: config/pod-with-pod-affinity.yaml measurePods: 1000
- podTemplatePath: config/pod-with-pod-anti-affinity.yaml
- podTemplatePath: config/pod-with-preferred-pod-affinity.yaml - name: SchedulingPreferredPodAntiAffinity
- podTemplatePath: config/pod-with-preferred-pod-anti-affinity.yaml workloadTemplate:
podsToSchedule: - opcode: createNodes
podTemplatePath: config/pod-default.yaml countParam: $initNodes
params: uniqueNodeLabelStrategy:
- numNodes: 500 labelKey: kubernetes.io/hostname
numInitPods: [200, 200, 200, 200, 200] - opcode: createPods
numPodsToSchedule: 1000 countParam: $initPods
- numNodes: 5000 podTemplatePath: config/pod-with-preferred-pod-anti-affinity.yaml
numInitPods: [2000, 2000, 2000, 2000, 2000] namespace: sched-setup
numPodsToSchedule: 1000 - opcode: createPods
- template: countParam: $measurePods
desc: Preemption podTemplatePath: config/pod-with-preferred-pod-anti-affinity.yaml
initPods: namespace: sched-test
- podTemplatePath: config/pod-low-priority.yaml collectMetrics: true
podsToSchedule: workloads:
podTemplatePath: config/pod-high-priority.yaml - name: 500Nodes
params: params:
- numNodes: 500 initNodes: 500
numInitPods: [2000] initPods: 500
numPodsToSchedule: 500 measurePods: 1000
- numNodes: 5000 - name: 5000Nodes
numInitPods: [20000] params:
numPodsToSchedule: 5000 initNodes: 5000
- template: initPods: 5000
desc: Unschedulable measurePods: 1000
skipWaitUntilInitPodsScheduled: true
initPods: - name: SchedulingNodeAffinity
- podTemplatePath: config/pod-large-cpu.yaml workloadTemplate:
podsToSchedule: - opcode: createNodes
podTemplatePath: config/pod-default.yaml countParam: $initNodes
params: nodeTemplatePath: config/node-default.yaml
- numNodes: 500 labelNodePrepareStrategy:
numInitPods: [200] labelKey: "failure-domain.beta.kubernetes.io/zone"
numPodsToSchedule: 1000 labelValues: ["zone1"]
- numNodes: 5000 - opcode: createPods
numInitPods: [200] countParam: $initPods
numPodsToSchedule: 5000 podTemplatePath: config/pod-with-node-affinity.yaml
- numNodes: 5000 - opcode: createPods
numInitPods: [2000] countParam: $measurePods
numPodsToSchedule: 5000 podTemplatePath: config/pod-with-node-affinity.yaml
collectMetrics: true
workloads:
- name: 500Nodes
params:
initNodes: 500
initPods: 500
measurePods: 1000
- name: 5000Nodes
params:
initNodes: 5000
initPods: 5000
measurePods: 1000
- name: TopologySpreading
workloadTemplate:
- opcode: createNodes
countParam: $initNodes
nodeTemplatePath: config/node-default.yaml
labelNodePrepareStrategy:
labelKey: "topology.kubernetes.io/zone"
labelValues: ["moon-1", "moon-2", "moon-3"]
- opcode: createPods
countParam: $initPods
podTemplatePath: config/pod-default.yaml
- opcode: createPods
countParam: $measurePods
podTemplatePath: config/pod-with-topology-spreading.yaml
collectMetrics: true
workloads:
- name: 500Nodes
params:
initNodes: 500
initPods: 1000
measurePods: 1000
- name: 5000Nodes
params:
initNodes: 5000
initPods: 5000
measurePods: 2000
- name: PreferredTopologySpreading
workloadTemplate:
- opcode: createNodes
countParam: $initNodes
nodeTemplatePath: config/node-default.yaml
labelNodePrepareStrategy:
labelKey: "topology.kubernetes.io/zone"
labelValues: ["moon-1", "moon-2", "moon-3"]
- opcode: createPods
countParam: $initPods
podTemplatePath: config/pod-default.yaml
- opcode: createPods
countParam: $measurePods
podTemplatePath: config/pod-with-preferred-topology-spreading.yaml
collectMetrics: true
workloads:
- name: 500Nodes
params:
initNodes: 500
initPods: 1000
measurePods: 1000
- name: 5000Nodes
params:
initNodes: 5000
initPods: 5000
measurePods: 2000
- name: MixedSchedulingBasePod
workloadTemplate:
- opcode: createNodes
countParam: $initNodes
nodeTemplatePath: config/node-default.yaml
labelNodePrepareStrategy:
labelKey: "topology.kubernetes.io/zone"
labelValues: ["zone1"]
- opcode: createPods
countParam: $initPods
podTemplatePath: config/pod-default.yaml
namespace: sched-setup
- opcode: createPods
countParam: $initPods
podTemplatePath: config/pod-with-pod-affinity.yaml
namespace: sched-setup
- opcode: createPods
countParam: $initPods
podTemplatePath: config/pod-with-pod-anti-affinity.yaml
namespace: sched-setup
- opcode: createPods
countParam: $initPods
podTemplatePath: config/pod-with-preferred-pod-affinity.yaml
namespace: sched-setup
- opcode: createPods
countParam: $initPods
podTemplatePath: config/pod-with-preferred-pod-anti-affinity.yaml
namespace: sched-setup
- opcode: createPods
countParam: $measurePods
podTemplatePath: config/pod-default.yaml
collectMetrics: true
workloads:
- name: 500Nodes
params:
initNodes: 500
initPods: 200
measurePods: 1000
- name: 5000Nodes
params:
initNodes: 5000
initPods: 2000
measurePods: 1000
- name: Preemption
workloadTemplate:
- opcode: createNodes
countParam: $initNodes
- opcode: createPods
countParam: $initPods
podTemplatePath: config/pod-low-priority.yaml
- opcode: createPods
countParam: $measurePods
podTemplatePath: config/pod-high-priority.yaml
collectMetrics: true
workloads:
- name: 500Nodes
params:
initNodes: 500
initPods: 2000
measurePods: 500
- name: 5000Nodes
params:
initNodes: 5000
initPods: 20000
measurePods: 5000
- name: Unschedulable
workloadTemplate:
- opcode: createNodes
countParam: $initNodes
- opcode: createPods
countParam: $initPods
podTemplatePath: config/pod-large-cpu.yaml
skipWaitToCompletion: true
- opcode: createPods
countParam: $measurePods
podTemplatePath: config/pod-default.yaml
collectMetrics: true
workloads:
- name: 500Nodes/200InitPods
params:
initNodes: 500
initPods: 200
measurePods: 1000
- name: 5000Nodes/200InitPods
params:
initNodes: 5000
initPods: 200
measurePods: 5000
- name: 5000Nodes/2000InitPods
params:
initNodes: 5000
initPods: 2000
measurePods: 5000

View File

@ -448,7 +448,7 @@ func benchmarkScheduling(numExistingPods, minPods int,
clientset, clientset,
nodeStrategies, nodeStrategies,
"scheduler-perf-") "scheduler-perf-")
if err := nodePreparer.PrepareNodes(); err != nil { if err := nodePreparer.PrepareNodes(0); err != nil {
klog.Fatalf("%v", err) klog.Fatalf("%v", err)
} }
defer nodePreparer.CleanupNodes() defer nodePreparer.CleanupNodes()

View File

@ -17,12 +17,17 @@ limitations under the License.
package benchmark package benchmark
import ( import (
"context"
"encoding/json"
"fmt" "fmt"
"io/ioutil" "io/ioutil"
"strings"
"sync"
"testing" "testing"
"time" "time"
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/wait"
utilfeature "k8s.io/apiserver/pkg/util/feature" utilfeature "k8s.io/apiserver/pkg/util/feature"
coreinformers "k8s.io/client-go/informers/core/v1" coreinformers "k8s.io/client-go/informers/core/v1"
clientset "k8s.io/client-go/kubernetes" clientset "k8s.io/client-go/kubernetes"
@ -35,7 +40,10 @@ import (
) )
const ( const (
configFile = "config/performance-config.yaml" configFile = "config/performance-config.yaml"
createNodesOpcode = "createNodes"
createPodsOpcode = "createPods"
barrierOpcode = "barrier"
) )
var ( var (
@ -51,87 +59,248 @@ var (
} }
) )
// testCase configures a test case to run the scheduler performance test. Users should be able to // testCase defines a set of test cases that intend to test the performance of
// provide this via a YAML file. // similar workloads of varying sizes with shared overall settings such as
// // feature gates and metrics collected.
// It specifies nodes and pods in the cluster before running the test. It also specifies the pods to
// schedule during the test. The config can be as simple as just specify number of nodes/pods, where
// default spec will be applied. It also allows the user to specify a pod spec template for more
// complicated test cases.
//
// It also specifies the metrics to be collected after the test. If nothing is specified, default metrics
// such as scheduling throughput and latencies will be collected.
type testCase struct { type testCase struct {
// description of the test case // Name of the testCase.
Desc string Name string
// configures nodes in the cluster // Feature gates to set before running the test. Optional.
Nodes nodeCase
// configures pods in the cluster before running the tests
InitPods []podCase
// configures the test to now wait for init pods to schedule before creating
// test pods.
SkipWaitUntilInitPodsScheduled bool
// pods to be scheduled during the test.
PodsToSchedule podCase
// optional, feature gates to set before running the test
FeatureGates map[featuregate.Feature]bool FeatureGates map[featuregate.Feature]bool
// optional, replaces default defaultMetricsCollectorConfig if supplied. // List of metrics to collect. Optional, defaults to
// defaultMetricsCollectorConfig if unspecified.
MetricsCollectorConfig *metricsCollectorConfig MetricsCollectorConfig *metricsCollectorConfig
// Template for sequence of ops that each workload must follow. Each op will
// be executed serially one after another. Each element of the list must be
// createNodesOp, createPodsOp, or barrierOp.
WorkloadTemplate []op
// List of workloads to run under this testCase.
Workloads []*workload
// TODO(#93792): reduce config toil by having a default pod and node spec per
// testCase? CreatePods and CreateNodes ops will inherit these unless
// manually overridden.
} }
type nodeCase struct { func (tc *testCase) collectsMetrics() bool {
Num int for _, op := range tc.WorkloadTemplate {
if op.realOp.collectsMetrics() {
return true
}
}
return false
}
// workload is a subtest under a testCase that tests the scheduler performance
// for a certain ordering of ops. The set of nodes created and pods scheduled
// in a workload may be heterogenous.
type workload struct {
// Name of the workload.
Name string
// Values of parameters used in the workloadTemplate.
Params map[string]int
}
// op is a dummy struct which stores the real op in itself.
type op struct {
realOp realOp
}
// UnmarshalJSON is a custom unmarshaler for the op struct since we don't know
// which op we're decoding at runtime.
func (op *op) UnmarshalJSON(b []byte) error {
possibleOps := []realOp{
&createNodesOp{},
&createPodsOp{},
&barrierOp{},
// TODO(#93793): add a sleep timer op to simulate waiting?
// TODO(#94601): add a delete nodes op to simulate scaling behaviour?
}
var firstError error
for _, possibleOp := range possibleOps {
if err := json.Unmarshal(b, possibleOp); err == nil {
if err2 := possibleOp.isValid(true); err2 == nil {
op.realOp = possibleOp
return nil
} else if firstError == nil {
// Don't return an error yet. Even though this op is invalid, it may
// still match other possible ops.
firstError = err2
}
}
}
return fmt.Errorf("cannot unmarshal %s into any known op type: %w", string(b), firstError)
}
// realOp is an interface that is implemented by different structs. To evaluate
// the validity of ops at parse-time, a isValid function must be implemented.
type realOp interface {
// isValid verifies the validity of the op args such as node/pod count. Note
// that we don't catch undefined parameters at this stage.
isValid(allowParameterization bool) error
// collectsMetrics checks if the op collects metrics.
collectsMetrics() bool
// patchParams returns a patched realOp of the same type after substituting
// parameterizable values with workload-specific values. One should implement
// this method on the value receiver base type, not a pointer receiver base
// type, even though calls will be made from with a *realOp. This is because
// callers don't want the receiver to inadvertently modify the realOp
// (instead, it's returned as a return value).
patchParams(w *workload) (realOp, error)
}
func isValidParameterizable(val string) bool {
return strings.HasPrefix(val, "$")
}
// createNodesOp defines an op where nodes are created as a part of a workload.
type createNodesOp struct {
// Must be "createNodes".
Opcode string
// Number of nodes to create. Parameterizable through CountParam.
Count int
// Template parameter for Count.
CountParam string
// Path to spec file describing the nodes to create. Optional.
NodeTemplatePath *string NodeTemplatePath *string
// At most one of the following strategies can be defined. If not specified, default to TrivialNodePrepareStrategy. // At most one of the following strategies can be defined. Optional, defaults
// to TrivialNodePrepareStrategy if unspecified.
NodeAllocatableStrategy *testutils.NodeAllocatableStrategy NodeAllocatableStrategy *testutils.NodeAllocatableStrategy
LabelNodePrepareStrategy *testutils.LabelNodePrepareStrategy LabelNodePrepareStrategy *testutils.LabelNodePrepareStrategy
UniqueNodeLabelStrategy *testutils.UniqueNodeLabelStrategy UniqueNodeLabelStrategy *testutils.UniqueNodeLabelStrategy
} }
type podCase struct { func (cno *createNodesOp) isValid(allowParameterization bool) error {
Num int if cno.Opcode != createNodesOpcode {
PodTemplatePath *string return fmt.Errorf("invalid opcode")
}
ok := (cno.Count > 0 ||
(cno.CountParam != "" && allowParameterization && isValidParameterizable(cno.CountParam)))
if !ok {
return fmt.Errorf("invalid Count=%d / CountParam=%q", cno.Count, cno.CountParam)
}
return nil
}
func (*createNodesOp) collectsMetrics() bool {
return false
}
func (cno createNodesOp) patchParams(w *workload) (realOp, error) {
if cno.CountParam != "" {
var ok bool
if cno.Count, ok = w.Params[cno.CountParam[1:]]; !ok {
return nil, fmt.Errorf("parameter %s is undefined", cno.CountParam)
}
}
return &cno, (&cno).isValid(false)
}
// createPodsOp defines an op where pods are scheduled as a part of a workload.
// The test can block on the completion of this op before moving forward or
// continue asynchronously.
type createPodsOp struct {
// Must be "createPods".
Opcode string
// Number of pods to schedule. Parameterizable through CountParam.
Count int
// Template parameter for Count.
CountParam string
// Whether or not to enable metrics collection for this createPodsOp.
// Optional. Both CollectMetrics and SkipWaitToCompletion cannot be true at
// the same time for a particular createPodsOp.
CollectMetrics bool
// Namespace the pods should be created in. Optional, defaults to a unique
// namespace of the format "namespace-<number>".
Namespace *string
// Path to spec file describing the pods to schedule. Optional.
PodTemplatePath *string
// Whether or not to wait for all pods in this op to get scheduled. Optional,
// defaults to false.
SkipWaitToCompletion bool
// Persistent volume settings for the pods to be scheduled. Optional.
PersistentVolumeTemplatePath *string PersistentVolumeTemplatePath *string
PersistentVolumeClaimTemplatePath *string PersistentVolumeClaimTemplatePath *string
} }
// simpleTestCases defines a set of test cases that share the same template (node spec, pod spec, etc) func (cpo *createPodsOp) isValid(allowParameterization bool) error {
// with testParams(e.g., NumNodes) being overridden. This provides a convenient way to define multiple tests if cpo.Opcode != createPodsOpcode {
// with various sizes. return fmt.Errorf("invalid opcode")
type simpleTestCases struct { }
Template testCase ok := (cpo.Count > 0 ||
Params []testParams (cpo.CountParam != "" && allowParameterization && isValidParameterizable(cpo.CountParam)))
if !ok {
return fmt.Errorf("invalid Count=%d / CountParam=%q", cpo.Count, cpo.CountParam)
}
if cpo.CollectMetrics && cpo.SkipWaitToCompletion {
// While it's technically possible to achieve this, the additional
// complexity is not worth it, especially given that we don't have any
// use-cases right now.
return fmt.Errorf("collectMetrics and skipWaitToCompletion cannot be true at the same time")
}
return nil
} }
type testParams struct { func (cpo *createPodsOp) collectsMetrics() bool {
NumNodes int return cpo.CollectMetrics
NumInitPods []int
NumPodsToSchedule int
} }
type testDataCollector interface { func (cpo createPodsOp) patchParams(w *workload) (realOp, error) {
run(stopCh chan struct{}) if cpo.CountParam != "" {
collect() []DataItem var ok bool
if cpo.Count, ok = w.Params[cpo.CountParam[1:]]; !ok {
return nil, fmt.Errorf("parameter %s is undefined", cpo.CountParam)
}
}
return &cpo, (&cpo).isValid(false)
}
// barrierOp defines an op that can be used to wait until all scheduled pods of
// one or many namespaces have been bound to nodes. This is useful when pods
// were scheduled with SkipWaitToCompletion set to true. A barrierOp is added
// at the end of each each workload automatically.
type barrierOp struct {
// Must be "barrier".
Opcode string
// Namespaces to block on. Empty array or not specifying this field signifies
// that the barrier should block on all namespaces.
Namespaces []string
}
func (bo *barrierOp) isValid(allowParameterization bool) error {
if bo.Opcode != barrierOpcode {
return fmt.Errorf("invalid opcode")
}
return nil
}
func (*barrierOp) collectsMetrics() bool {
return false
}
func (bo barrierOp) patchParams(w *workload) (realOp, error) {
return &bo, nil
} }
func BenchmarkPerfScheduling(b *testing.B) { func BenchmarkPerfScheduling(b *testing.B) {
dataItems := DataItems{Version: "v1"} testCases, err := getTestCases(configFile)
tests, err := parseTestCases(configFile)
if err != nil { if err != nil {
b.Fatal(err) b.Fatal(err)
} }
if err = validateTestCases(testCases); err != nil {
b.Fatal(err)
}
for _, test := range tests { dataItems := DataItems{Version: "v1"}
initPods := 0 for _, tc := range testCases {
for _, p := range test.InitPods { b.Run(tc.Name, func(b *testing.B) {
initPods += p.Num for _, w := range tc.Workloads {
} b.Run(w.Name, func(b *testing.B) {
name := fmt.Sprintf("%v/%vNodes/%vInitPods/%vPodsToSchedule", test.Desc, test.Nodes.Num, initPods, test.PodsToSchedule.Num) for feature, flag := range tc.FeatureGates {
b.Run(name, func(b *testing.B) { defer featuregatetesting.SetFeatureGateDuringTest(b, utilfeature.DefaultFeatureGate, feature, flag)()
for feature, flag := range test.FeatureGates { }
defer featuregatetesting.SetFeatureGateDuringTest(b, utilfeature.DefaultFeatureGate, feature, flag)() dataItems.DataItems = append(dataItems.DataItems, runWorkload(b, tc, w)...)
})
} }
dataItems.DataItems = append(dataItems.DataItems, perfScheduling(test, b)...)
}) })
} }
if err := dataItems2JSONFile(dataItems, b.Name()); err != nil { if err := dataItems2JSONFile(dataItems, b.Name()); err != nil {
@ -139,202 +308,219 @@ func BenchmarkPerfScheduling(b *testing.B) {
} }
} }
func perfScheduling(test testCase, b *testing.B) []DataItem { func runWorkload(b *testing.B, tc *testCase, w *workload) []DataItem {
// 30 minutes should be plenty enough even for the 5000-node tests.
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Minute)
defer cancel()
finalFunc, podInformer, clientset := mustSetupScheduler() finalFunc, podInformer, clientset := mustSetupScheduler()
defer finalFunc() b.Cleanup(finalFunc)
nodePreparer, err := getNodePreparer(test.Nodes, clientset)
if err != nil {
b.Fatal(err)
}
if err := nodePreparer.PrepareNodes(); err != nil {
b.Fatal(err)
}
defer nodePreparer.CleanupNodes()
total := 0
for _, p := range test.InitPods {
if err := createPods(setupNamespace, p, clientset); err != nil {
b.Fatal(err)
}
total += p.Num
}
if !test.SkipWaitUntilInitPodsScheduled {
if err := waitNumPodsScheduled(b, total, podInformer, setupNamespace); err != nil {
b.Fatal(err)
}
}
// start benchmark
b.ResetTimer()
// Start test data collectors.
stopCh := make(chan struct{})
collectors := getTestDataCollectors(test, podInformer, b)
for _, collector := range collectors {
go collector.run(stopCh)
}
// Schedule the main workload
if err := createPods(testNamespace, test.PodsToSchedule, clientset); err != nil {
b.Fatal(err)
}
if err := waitNumPodsScheduled(b, test.PodsToSchedule.Num, podInformer, testNamespace); err != nil {
b.Fatal(err)
}
close(stopCh)
// Note: without this line we're taking the overhead of defer() into account.
b.StopTimer()
var mu sync.Mutex
var dataItems []DataItem var dataItems []DataItem
for _, collector := range collectors { numPodsScheduledPerNamespace := make(map[string]int)
dataItems = append(dataItems, collector.collect()...) nextNodeIndex := 0
for opIndex, op := range tc.WorkloadTemplate {
realOp, err := op.realOp.patchParams(w)
if err != nil {
b.Fatalf("op %d: %v", opIndex, err)
}
select {
case <-ctx.Done():
b.Fatalf("op %d: %v", opIndex, ctx.Err())
default:
}
switch concreteOp := realOp.(type) {
case *createNodesOp:
nodePreparer, err := getNodePreparer(fmt.Sprintf("node-%d-", opIndex), concreteOp, clientset)
if err != nil {
b.Fatalf("op %d: %v", opIndex, err)
}
if err := nodePreparer.PrepareNodes(nextNodeIndex); err != nil {
b.Fatalf("op %d: %v", opIndex, err)
}
b.Cleanup(func() {
nodePreparer.CleanupNodes()
})
nextNodeIndex += concreteOp.Count
case *createPodsOp:
var namespace string
if concreteOp.Namespace != nil {
namespace = *concreteOp.Namespace
} else {
namespace = fmt.Sprintf("namespace-%d", opIndex)
}
var collectors []testDataCollector
var collectorCtx context.Context
var collectorCancel func()
if concreteOp.CollectMetrics {
collectorCtx, collectorCancel = context.WithCancel(ctx)
defer collectorCancel()
collectors = getTestDataCollectors(podInformer, fmt.Sprintf("%s/%s", b.Name(), namespace), namespace, tc.MetricsCollectorConfig)
for _, collector := range collectors {
go collector.run(collectorCtx)
}
}
if err := createPods(namespace, concreteOp, clientset); err != nil {
b.Fatalf("op %d: %v", opIndex, err)
}
if concreteOp.SkipWaitToCompletion {
// Only record those namespaces that may potentially require barriers
// in the future.
if _, ok := numPodsScheduledPerNamespace[namespace]; ok {
numPodsScheduledPerNamespace[namespace] += concreteOp.Count
} else {
numPodsScheduledPerNamespace[namespace] = concreteOp.Count
}
} else {
if err := waitUntilPodsScheduledInNamespace(ctx, podInformer, b.Name(), namespace, concreteOp.Count); err != nil {
b.Fatalf("op %d: error in waiting for pods to get scheduled: %v", opIndex, err)
}
}
if concreteOp.CollectMetrics {
// CollectMetrics and SkipWaitToCompletion can never be true at the
// same time, so if we're here, it means that all pods have been
// scheduled.
collectorCancel()
mu.Lock()
for _, collector := range collectors {
dataItems = append(dataItems, collector.collect()...)
}
mu.Unlock()
}
case *barrierOp:
for _, namespace := range concreteOp.Namespaces {
if _, ok := numPodsScheduledPerNamespace[namespace]; !ok {
b.Fatalf("op %d: unknown namespace %s", opIndex, namespace)
}
}
if err := waitUntilPodsScheduled(ctx, podInformer, b.Name(), concreteOp.Namespaces, numPodsScheduledPerNamespace); err != nil {
b.Fatalf("op %d: %v", opIndex, err)
}
// At the end of the barrier, we can be sure that there are no pods
// pending scheduling in the namespaces that we just blocked on.
if len(concreteOp.Namespaces) == 0 {
numPodsScheduledPerNamespace = make(map[string]int)
} else {
for _, namespace := range concreteOp.Namespaces {
delete(numPodsScheduledPerNamespace, namespace)
}
}
default:
b.Fatalf("op %d: invalid op %v", opIndex, concreteOp)
}
}
if err := waitUntilPodsScheduled(ctx, podInformer, b.Name(), nil, numPodsScheduledPerNamespace); err != nil {
// Any pending pods must be scheduled before this test can be considered to
// be complete.
b.Fatal(err)
} }
return dataItems return dataItems
} }
func waitNumPodsScheduled(b *testing.B, num int, podInformer coreinformers.PodInformer, namespace string) error { type testDataCollector interface {
for { run(ctx context.Context)
scheduled, err := getScheduledPods(podInformer, namespace) collect() []DataItem
if err != nil {
return err
}
if len(scheduled) >= num {
break
}
klog.Infof("%s: got %d existing pods, required: %d", b.Name(), len(scheduled), num)
time.Sleep(1 * time.Second)
}
return nil
} }
func getTestDataCollectors(tc testCase, podInformer coreinformers.PodInformer, b *testing.B) []testDataCollector { func getTestDataCollectors(podInformer coreinformers.PodInformer, name, namespace string, mcc *metricsCollectorConfig) []testDataCollector {
collectors := []testDataCollector{newThroughputCollector(podInformer, map[string]string{"Name": b.Name()}, []string{testNamespace})} if mcc == nil {
metricsCollectorConfig := defaultMetricsCollectorConfig mcc = &defaultMetricsCollectorConfig
if tc.MetricsCollectorConfig != nil { }
metricsCollectorConfig = *tc.MetricsCollectorConfig return []testDataCollector{
newThroughputCollector(podInformer, map[string]string{"Name": name}, []string{namespace}),
newMetricsCollector(mcc, map[string]string{"Name": name}),
} }
collectors = append(collectors, newMetricsCollector(metricsCollectorConfig, map[string]string{"Name": b.Name()}))
return collectors
} }
func getNodePreparer(nc nodeCase, clientset clientset.Interface) (testutils.TestNodePreparer, error) { func getNodePreparer(prefix string, cno *createNodesOp, clientset clientset.Interface) (testutils.TestNodePreparer, error) {
var nodeStrategy testutils.PrepareNodeStrategy = &testutils.TrivialNodePrepareStrategy{} var nodeStrategy testutils.PrepareNodeStrategy = &testutils.TrivialNodePrepareStrategy{}
if nc.NodeAllocatableStrategy != nil { if cno.NodeAllocatableStrategy != nil {
nodeStrategy = nc.NodeAllocatableStrategy nodeStrategy = cno.NodeAllocatableStrategy
} else if nc.LabelNodePrepareStrategy != nil { } else if cno.LabelNodePrepareStrategy != nil {
nodeStrategy = nc.LabelNodePrepareStrategy nodeStrategy = cno.LabelNodePrepareStrategy
} else if nc.UniqueNodeLabelStrategy != nil { } else if cno.UniqueNodeLabelStrategy != nil {
nodeStrategy = nc.UniqueNodeLabelStrategy nodeStrategy = cno.UniqueNodeLabelStrategy
} }
if nc.NodeTemplatePath != nil { if cno.NodeTemplatePath != nil {
node, err := getNodeSpecFromFile(nc.NodeTemplatePath) node, err := getNodeSpecFromFile(cno.NodeTemplatePath)
if err != nil { if err != nil {
return nil, err return nil, err
} }
return framework.NewIntegrationTestNodePreparerWithNodeSpec( return framework.NewIntegrationTestNodePreparerWithNodeSpec(
clientset, clientset,
[]testutils.CountToStrategy{{Count: nc.Num, Strategy: nodeStrategy}}, []testutils.CountToStrategy{{Count: cno.Count, Strategy: nodeStrategy}},
node, node,
), nil ), nil
} }
return framework.NewIntegrationTestNodePreparer( return framework.NewIntegrationTestNodePreparer(
clientset, clientset,
[]testutils.CountToStrategy{{Count: nc.Num, Strategy: nodeStrategy}}, []testutils.CountToStrategy{{Count: cno.Count, Strategy: nodeStrategy}},
"scheduler-perf-", prefix,
), nil ), nil
} }
func createPods(ns string, pc podCase, clientset clientset.Interface) error { func createPods(namespace string, cpo *createPodsOp, clientset clientset.Interface) error {
strategy, err := getPodStrategy(pc) strategy, err := getPodStrategy(cpo)
if err != nil { if err != nil {
return err return err
} }
config := testutils.NewTestPodCreatorConfig() config := testutils.NewTestPodCreatorConfig()
config.AddStrategy(ns, pc.Num, strategy) config.AddStrategy(namespace, cpo.Count, strategy)
podCreator := testutils.NewTestPodCreator(clientset, config) podCreator := testutils.NewTestPodCreator(clientset, config)
return podCreator.CreatePods() return podCreator.CreatePods()
} }
func getPodStrategy(pc podCase) (testutils.TestPodCreateStrategy, error) { // waitUntilPodsScheduledInNamespace blocks until all pods in the given
basePod := makeBasePod() // namespace are scheduled. Times out after 10 minutes because even at the
if pc.PodTemplatePath != nil { // lowest observed QPS of ~10 pods/sec, a 5000-node test should complete.
var err error func waitUntilPodsScheduledInNamespace(ctx context.Context, podInformer coreinformers.PodInformer, name string, namespace string, wantCount int) error {
basePod, err = getPodSpecFromFile(pc.PodTemplatePath) return wait.PollImmediate(1*time.Second, 10*time.Minute, func() (bool, error) {
select {
case <-ctx.Done():
return true, ctx.Err()
default:
}
scheduled, err := getScheduledPods(podInformer, namespace)
if err != nil { if err != nil {
return nil, err return false, err
}
if len(scheduled) >= wantCount {
return true, nil
}
klog.Infof("%s: namespace %s: got %d pods, want %d", name, namespace, len(scheduled), wantCount)
return false, nil
})
}
// waitUntilPodsScheduled blocks until the all pods in the given namespaces are
// scheduled.
func waitUntilPodsScheduled(ctx context.Context, podInformer coreinformers.PodInformer, name string, namespaces []string, numPodsScheduledPerNamespace map[string]int) error {
// If unspecified, default to all known namespaces.
if len(namespaces) == 0 {
for namespace := range numPodsScheduledPerNamespace {
namespaces = append(namespaces, namespace)
} }
} }
if pc.PersistentVolumeClaimTemplatePath == nil { for _, namespace := range namespaces {
return testutils.NewCustomCreatePodStrategy(basePod), nil select {
} case <-ctx.Done():
return ctx.Err()
pvTemplate, err := getPersistentVolumeSpecFromFile(pc.PersistentVolumeTemplatePath) default:
if err != nil { }
return nil, err wantCount, ok := numPodsScheduledPerNamespace[namespace]
} if !ok {
pvcTemplate, err := getPersistentVolumeClaimSpecFromFile(pc.PersistentVolumeClaimTemplatePath) return fmt.Errorf("unknown namespace %s", namespace)
if err != nil { }
return nil, err if err := waitUntilPodsScheduledInNamespace(ctx, podInformer, name, namespace, wantCount); err != nil {
} return fmt.Errorf("error waiting for pods in namespace %q: %w", namespace, err)
return testutils.NewCreatePodWithPersistentVolumeStrategy(pvcTemplate, getCustomVolumeFactory(pvTemplate), basePod), nil
}
func parseTestCases(path string) ([]testCase, error) {
var simpleTests []simpleTestCases
if err := getSpecFromFile(&path, &simpleTests); err != nil {
return nil, fmt.Errorf("parsing test cases: %v", err)
}
testCases := make([]testCase, 0)
for _, s := range simpleTests {
testCase := s.Template
for _, p := range s.Params {
testCase.Nodes.Num = p.NumNodes
testCase.InitPods = append([]podCase(nil), testCase.InitPods...)
for i, v := range p.NumInitPods {
testCase.InitPods[i].Num = v
}
testCase.PodsToSchedule.Num = p.NumPodsToSchedule
testCases = append(testCases, testCase)
} }
} }
return nil
return testCases, nil
}
func getNodeSpecFromFile(path *string) (*v1.Node, error) {
nodeSpec := &v1.Node{}
if err := getSpecFromFile(path, nodeSpec); err != nil {
return nil, fmt.Errorf("parsing Node: %v", err)
}
return nodeSpec, nil
}
func getPodSpecFromFile(path *string) (*v1.Pod, error) {
podSpec := &v1.Pod{}
if err := getSpecFromFile(path, podSpec); err != nil {
return nil, fmt.Errorf("parsing Pod: %v", err)
}
return podSpec, nil
}
func getPersistentVolumeSpecFromFile(path *string) (*v1.PersistentVolume, error) {
persistentVolumeSpec := &v1.PersistentVolume{}
if err := getSpecFromFile(path, persistentVolumeSpec); err != nil {
return nil, fmt.Errorf("parsing PersistentVolume: %v", err)
}
return persistentVolumeSpec, nil
}
func getPersistentVolumeClaimSpecFromFile(path *string) (*v1.PersistentVolumeClaim, error) {
persistentVolumeClaimSpec := &v1.PersistentVolumeClaim{}
if err := getSpecFromFile(path, persistentVolumeClaimSpec); err != nil {
return nil, fmt.Errorf("parsing PersistentVolumeClaim: %v", err)
}
return persistentVolumeClaimSpec, nil
} }
func getSpecFromFile(path *string, spec interface{}) error { func getSpecFromFile(path *string, spec interface{}) error {
@ -342,7 +528,95 @@ func getSpecFromFile(path *string, spec interface{}) error {
if err != nil { if err != nil {
return err return err
} }
return yaml.Unmarshal(bytes, spec) return yaml.UnmarshalStrict(bytes, spec)
}
func getTestCases(path string) ([]*testCase, error) {
testCases := make([]*testCase, 0)
if err := getSpecFromFile(&path, &testCases); err != nil {
return nil, fmt.Errorf("parsing test cases: %w", err)
}
return testCases, nil
}
func validateTestCases(testCases []*testCase) error {
if len(testCases) == 0 {
return fmt.Errorf("no test cases defined")
}
for _, tc := range testCases {
if len(tc.Workloads) == 0 {
return fmt.Errorf("%s: no workloads defined", tc.Name)
}
if len(tc.WorkloadTemplate) == 0 {
return fmt.Errorf("%s: no ops defined", tc.Name)
}
// Make sure there's at least one CreatePods op with collectMetrics set to
// true in each workload. What's the point of running a performance
// benchmark if no statistics are collected for reporting?
if !tc.collectsMetrics() {
return fmt.Errorf("%s: no op in the workload template collects metrics", tc.Name)
}
// TODO(#93795): make sure each workload within a test case has a unique
// name? The name is used to identify the stats in benchmark reports.
// TODO(#94404): check for unused template parameters? Probably a typo.
}
return nil
}
func getPodStrategy(cpo *createPodsOp) (testutils.TestPodCreateStrategy, error) {
basePod := makeBasePod()
if cpo.PodTemplatePath != nil {
var err error
basePod, err = getPodSpecFromFile(cpo.PodTemplatePath)
if err != nil {
return nil, err
}
}
if cpo.PersistentVolumeClaimTemplatePath == nil {
return testutils.NewCustomCreatePodStrategy(basePod), nil
}
pvTemplate, err := getPersistentVolumeSpecFromFile(cpo.PersistentVolumeTemplatePath)
if err != nil {
return nil, err
}
pvcTemplate, err := getPersistentVolumeClaimSpecFromFile(cpo.PersistentVolumeClaimTemplatePath)
if err != nil {
return nil, err
}
return testutils.NewCreatePodWithPersistentVolumeStrategy(pvcTemplate, getCustomVolumeFactory(pvTemplate), basePod), nil
}
func getNodeSpecFromFile(path *string) (*v1.Node, error) {
nodeSpec := &v1.Node{}
if err := getSpecFromFile(path, nodeSpec); err != nil {
return nil, fmt.Errorf("parsing Node: %w", err)
}
return nodeSpec, nil
}
func getPodSpecFromFile(path *string) (*v1.Pod, error) {
podSpec := &v1.Pod{}
if err := getSpecFromFile(path, podSpec); err != nil {
return nil, fmt.Errorf("parsing Pod: %w", err)
}
return podSpec, nil
}
func getPersistentVolumeSpecFromFile(path *string) (*v1.PersistentVolume, error) {
persistentVolumeSpec := &v1.PersistentVolume{}
if err := getSpecFromFile(path, persistentVolumeSpec); err != nil {
return nil, fmt.Errorf("parsing PersistentVolume: %w", err)
}
return persistentVolumeSpec, nil
}
func getPersistentVolumeClaimSpecFromFile(path *string) (*v1.PersistentVolumeClaim, error) {
persistentVolumeClaimSpec := &v1.PersistentVolumeClaim{}
if err := getSpecFromFile(path, persistentVolumeClaimSpec); err != nil {
return nil, fmt.Errorf("parsing PersistentVolumeClaim: %w", err)
}
return persistentVolumeClaimSpec, nil
} }
func getCustomVolumeFactory(pvTemplate *v1.PersistentVolume) func(id int) *v1.PersistentVolume { func getCustomVolumeFactory(pvTemplate *v1.PersistentVolume) func(id int) *v1.PersistentVolume {

View File

@ -17,6 +17,7 @@ limitations under the License.
package benchmark package benchmark
import ( import (
"context"
"encoding/json" "encoding/json"
"flag" "flag"
"fmt" "fmt"
@ -147,18 +148,18 @@ type metricsCollectorConfig struct {
// metricsCollector collects metrics from legacyregistry.DefaultGatherer.Gather() endpoint. // metricsCollector collects metrics from legacyregistry.DefaultGatherer.Gather() endpoint.
// Currently only Histrogram metrics are supported. // Currently only Histrogram metrics are supported.
type metricsCollector struct { type metricsCollector struct {
metricsCollectorConfig *metricsCollectorConfig
labels map[string]string labels map[string]string
} }
func newMetricsCollector(config metricsCollectorConfig, labels map[string]string) *metricsCollector { func newMetricsCollector(config *metricsCollectorConfig, labels map[string]string) *metricsCollector {
return &metricsCollector{ return &metricsCollector{
metricsCollectorConfig: config, metricsCollectorConfig: config,
labels: labels, labels: labels,
} }
} }
func (*metricsCollector) run(stopCh chan struct{}) { func (*metricsCollector) run(ctx context.Context) {
// metricCollector doesn't need to start before the tests, so nothing to do here. // metricCollector doesn't need to start before the tests, so nothing to do here.
} }
@ -231,7 +232,7 @@ func newThroughputCollector(podInformer coreinformers.PodInformer, labels map[st
} }
} }
func (tc *throughputCollector) run(stopCh chan struct{}) { func (tc *throughputCollector) run(ctx context.Context) {
podsScheduled, err := getScheduledPods(tc.podInformer, tc.namespaces...) podsScheduled, err := getScheduledPods(tc.podInformer, tc.namespaces...)
if err != nil { if err != nil {
klog.Fatalf("%v", err) klog.Fatalf("%v", err)
@ -239,8 +240,9 @@ func (tc *throughputCollector) run(stopCh chan struct{}) {
lastScheduledCount := len(podsScheduled) lastScheduledCount := len(podsScheduled)
for { for {
select { select {
case <-stopCh: case <-ctx.Done():
return return
// TODO(#94665): use time.Ticker instead
case <-time.After(throughputSampleFrequency): case <-time.After(throughputSampleFrequency):
podsScheduled, err := getScheduledPods(tc.podInformer, tc.namespaces...) podsScheduled, err := getScheduledPods(tc.podInformer, tc.namespaces...)
if err != nil { if err != nil {

View File

@ -931,7 +931,7 @@ type CountToStrategy struct {
} }
type TestNodePreparer interface { type TestNodePreparer interface {
PrepareNodes() error PrepareNodes(nextNodeIndex int) error
CleanupNodes() error CleanupNodes() error
} }