Merge pull request #125504 from macsko/scheduler_perf_gated_pods_test

scheduler_perf: Measure performance of scheduling when many gated pods
This commit is contained in:
Kubernetes Prow Robot 2024-06-18 12:40:02 -07:00 committed by GitHub
commit 7dc30dce15
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 86 additions and 0 deletions

View File

@ -1044,3 +1044,34 @@
measurePods: 2500
measureClaims: 500 # must be measurePods / 5
maxClaimsPerNode: 2
# This test case simulates the scheduling when many pods are gated and others are gradually deleted.
# https://github.com/kubernetes/kubernetes/issues/124384
- name: SchedulingWhileGated
defaultPodTemplatePath: config/templates/light-pod.yaml
workloadTemplate:
- opcode: createNodes
count: 1
nodeTemplatePath: config/templates/node-with-name.yaml
# Create pods that will stay gated to the end of the test.
- opcode: createPods
countParam: $gatedPods
podTemplatePath: config/templates/gated-pod.yaml
skipWaitToCompletion: true
# Wait to make sure gated pods are enqueued in scheduler.
- opcode: sleep
duration: 5s
# Create pods that will be gradually deleted after being scheduled.
- opcode: createPods
countParam: $deletingPods
deletePodsPerSecond: 50
- opcode: createPods
countParam: $measurePods
collectMetrics: true
workloads:
- name: 1Node
labels: [performance, fast]
params:
gatedPods: 10000
deletingPods: 20000
measurePods: 20000

View File

@ -0,0 +1,10 @@
apiVersion: v1
kind: Pod
metadata:
generateName: gated-pod-
spec:
schedulingGates:
- name: test.k8s.io/hold
containers:
- image: registry.k8s.io/pause:3.10
name: pause

View File

@ -0,0 +1,9 @@
apiVersion: v1
kind: Pod
metadata:
generateName: light-pod-
spec:
containers:
- image: registry.k8s.io/pause:3.10
name: pause
terminationGracePeriodSeconds: 0

View File

@ -19,6 +19,7 @@ package benchmark
import (
"context"
"encoding/json"
"errors"
"flag"
"fmt"
"io"
@ -37,6 +38,7 @@ import (
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/util/wait"
utilfeature "k8s.io/apiserver/pkg/util/feature"
@ -464,6 +466,9 @@ type createPodsOp struct {
// Optional
PersistentVolumeTemplatePath *string
PersistentVolumeClaimTemplatePath *string
// Number of pods to be deleted per second after they were scheduled. If set to 0, pods are not deleted.
// Optional
DeletePodsPerSecond int
}
func (cpo *createPodsOp) isValid(allowParameterization bool) error {
@ -479,6 +484,9 @@ func (cpo *createPodsOp) isValid(allowParameterization bool) error {
// use-cases right now.
return fmt.Errorf("collectMetrics and skipWaitToCompletion cannot be true at the same time")
}
if cpo.DeletePodsPerSecond < 0 {
return fmt.Errorf("invalid DeletePodsPerSecond=%d; should be non-negative", cpo.DeletePodsPerSecond)
}
return nil
}
@ -1030,6 +1038,34 @@ func runWorkload(tCtx ktesting.TContext, tc *testCase, w *workload, informerFact
mu.Unlock()
}
if concreteOp.DeletePodsPerSecond > 0 {
pods, err := podInformer.Lister().Pods(namespace).List(labels.Everything())
if err != nil {
tCtx.Fatalf("op %d: error in listing scheduled pods in the namespace: %v", opIndex, err)
}
ticker := time.NewTicker(time.Second / time.Duration(concreteOp.DeletePodsPerSecond))
defer ticker.Stop()
wg.Add(1)
go func() {
defer wg.Done()
for i := 0; i < len(pods); i++ {
select {
case <-ticker.C:
if err := tCtx.Client().CoreV1().Pods(namespace).Delete(tCtx, pods[i].Name, metav1.DeleteOptions{}); err != nil {
if errors.Is(err, context.Canceled) {
return
}
tCtx.Errorf("op %d: unable to delete pod %v: %v", opIndex, pods[i].Name, err)
}
case <-tCtx.Done():
return
}
}
}()
}
if !concreteOp.SkipWaitToCompletion {
// SkipWaitToCompletion=false indicates this step has waited for the Pods to be scheduled.
// So we reset the metrics in global registry; otherwise metrics gathered in this step