mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 11:50:44 +00:00
Merge pull request #64266 from shyamjvs/measure-max-scheduler-throughput-metric
Automatic merge from submit-queue (batch tested with PRs 63232, 64257, 64183, 64266, 64134). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Measure scheduler throughput in density test This is a step towards exposing scheduler-related metrics on [perf-dash](http://perf-dash.k8s.io/). This particular PR adds scheduler throughput computation and makes the results available in our test artifacts. So if you do some experiments, you'll have some historical baseline data to compare against. xref https://github.com/kubernetes/kubernetes/issues/63493 fyi - @wojtek-t @davidopp @bsalamat @misterikkit cc @kubernetes/sig-scheduling-misc @kubernetes/sig-scalability-misc ```release-note NONE ```
This commit is contained in:
commit
b8db949560
@ -206,21 +206,22 @@ func (l *PodStartupLatency) PrintJSON() string {
|
||||
return PrettyPrintJSON(PodStartupLatencyToPerfData(l))
|
||||
}
|
||||
|
||||
type SchedulingLatency struct {
|
||||
Scheduling LatencyMetric `json:"scheduling"`
|
||||
Binding LatencyMetric `json:"binding"`
|
||||
Total LatencyMetric `json:"total"`
|
||||
type SchedulingMetrics struct {
|
||||
SchedulingLatency LatencyMetric `json:"schedulingLatency"`
|
||||
BindingLatency LatencyMetric `json:"bindingLatency"`
|
||||
E2ELatency LatencyMetric `json:"e2eLatency"`
|
||||
ThroughputSamples []float64 `json:"throughputSamples"`
|
||||
}
|
||||
|
||||
func (l *SchedulingLatency) SummaryKind() string {
|
||||
return "SchedulingLatency"
|
||||
func (l *SchedulingMetrics) SummaryKind() string {
|
||||
return "SchedulingMetrics"
|
||||
}
|
||||
|
||||
func (l *SchedulingLatency) PrintHumanReadable() string {
|
||||
func (l *SchedulingMetrics) PrintHumanReadable() string {
|
||||
return PrettyPrintJSON(l)
|
||||
}
|
||||
|
||||
func (l *SchedulingLatency) PrintJSON() string {
|
||||
func (l *SchedulingMetrics) PrintJSON() string {
|
||||
return PrettyPrintJSON(l)
|
||||
}
|
||||
|
||||
@ -438,9 +439,9 @@ func getMetrics(c clientset.Interface) (string, error) {
|
||||
return string(body), nil
|
||||
}
|
||||
|
||||
// Retrieves scheduler metrics information.
|
||||
func getSchedulingLatency(c clientset.Interface) (*SchedulingLatency, error) {
|
||||
result := SchedulingLatency{}
|
||||
// Retrieves scheduler latency metrics.
|
||||
func getSchedulingLatency(c clientset.Interface) (*SchedulingMetrics, error) {
|
||||
result := SchedulingMetrics{}
|
||||
|
||||
// Check if master Node is registered
|
||||
nodes, err := c.CoreV1().Nodes().List(metav1.ListOptions{})
|
||||
@ -491,11 +492,11 @@ func getSchedulingLatency(c clientset.Interface) (*SchedulingLatency, error) {
|
||||
var metric *LatencyMetric = nil
|
||||
switch sample.Metric[model.MetricNameLabel] {
|
||||
case "scheduler_scheduling_algorithm_latency_microseconds":
|
||||
metric = &result.Scheduling
|
||||
metric = &result.SchedulingLatency
|
||||
case "scheduler_binding_latency_microseconds":
|
||||
metric = &result.Binding
|
||||
metric = &result.BindingLatency
|
||||
case "scheduler_e2e_scheduling_latency_microseconds":
|
||||
metric = &result.Total
|
||||
metric = &result.E2ELatency
|
||||
}
|
||||
if metric == nil {
|
||||
continue
|
||||
@ -512,7 +513,7 @@ func getSchedulingLatency(c clientset.Interface) (*SchedulingLatency, error) {
|
||||
}
|
||||
|
||||
// Verifies (currently just by logging them) the scheduling latencies.
|
||||
func VerifySchedulerLatency(c clientset.Interface) (*SchedulingLatency, error) {
|
||||
func VerifySchedulerLatency(c clientset.Interface) (*SchedulingMetrics, error) {
|
||||
latency, err := getSchedulingLatency(c)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -224,32 +224,42 @@ func density30AddonResourceVerifier(numNodes int) map[string]framework.ResourceC
|
||||
return constraints
|
||||
}
|
||||
|
||||
func logPodStartupStatus(c clientset.Interface, expectedPods int, observedLabels map[string]string, period time.Duration, stopCh chan struct{}) {
|
||||
func logPodStartupStatus(
|
||||
c clientset.Interface,
|
||||
expectedPods int,
|
||||
observedLabels map[string]string,
|
||||
period time.Duration,
|
||||
scheduleThroughputs []float64,
|
||||
stopCh chan struct{}) {
|
||||
|
||||
label := labels.SelectorFromSet(labels.Set(observedLabels))
|
||||
podStore, err := testutils.NewPodStore(c, metav1.NamespaceAll, label, fields.Everything())
|
||||
framework.ExpectNoError(err)
|
||||
defer podStore.Stop()
|
||||
|
||||
ticker := time.NewTicker(period)
|
||||
startupStatus := testutils.ComputeRCStartupStatus(podStore.List(), expectedPods)
|
||||
lastScheduledCount := startupStatus.Scheduled
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
pods := podStore.List()
|
||||
startupStatus := testutils.ComputeRCStartupStatus(pods, expectedPods)
|
||||
framework.Logf(startupStatus.String("Density"))
|
||||
case <-stopCh:
|
||||
pods := podStore.List()
|
||||
startupStatus := testutils.ComputeRCStartupStatus(pods, expectedPods)
|
||||
framework.Logf(startupStatus.String("Density"))
|
||||
return
|
||||
}
|
||||
// Log status of the pods.
|
||||
startupStatus := testutils.ComputeRCStartupStatus(podStore.List(), expectedPods)
|
||||
framework.Logf(startupStatus.String("Density"))
|
||||
// Compute scheduling throughput for the latest time period.
|
||||
throughput := float64(startupStatus.Scheduled-lastScheduledCount) / float64(period/time.Second)
|
||||
scheduleThroughputs = append(scheduleThroughputs, throughput)
|
||||
lastScheduledCount = startupStatus.Scheduled
|
||||
}
|
||||
}
|
||||
|
||||
// runDensityTest will perform a density test and return the time it took for
|
||||
// all pods to start
|
||||
func runDensityTest(dtc DensityTestConfig, testPhaseDurations *timer.TestPhaseTimer) time.Duration {
|
||||
func runDensityTest(dtc DensityTestConfig, testPhaseDurations *timer.TestPhaseTimer, scheduleThroughputs []float64) time.Duration {
|
||||
defer GinkgoRecover()
|
||||
|
||||
// Create all secrets, configmaps and daemons.
|
||||
@ -274,7 +284,7 @@ func runDensityTest(dtc DensityTestConfig, testPhaseDurations *timer.TestPhaseTi
|
||||
}()
|
||||
}
|
||||
logStopCh := make(chan struct{})
|
||||
go logPodStartupStatus(dtc.ClientSets[0], dtc.PodCount, map[string]string{"type": "densityPod"}, dtc.PollInterval, logStopCh)
|
||||
go logPodStartupStatus(dtc.ClientSets[0], dtc.PodCount, map[string]string{"type": "densityPod"}, dtc.PollInterval, scheduleThroughputs, logStopCh)
|
||||
wg.Wait()
|
||||
startupTime := time.Since(startTime)
|
||||
close(logStopCh)
|
||||
@ -355,6 +365,7 @@ var _ = SIGDescribe("Density", func() {
|
||||
var nodeCpuCapacity int64
|
||||
var nodeMemCapacity int64
|
||||
var nodes *v1.NodeList
|
||||
var scheduleThroughputs []float64
|
||||
|
||||
testCaseBaseName := "density"
|
||||
missingMeasurements := 0
|
||||
@ -397,6 +408,7 @@ var _ = SIGDescribe("Density", func() {
|
||||
latency, err := framework.VerifySchedulerLatency(c)
|
||||
framework.ExpectNoError(err)
|
||||
if err == nil {
|
||||
latency.ThroughputSamples = scheduleThroughputs
|
||||
summaries = append(summaries, latency)
|
||||
}
|
||||
summaries = append(summaries, testPhaseDurations)
|
||||
@ -643,7 +655,7 @@ var _ = SIGDescribe("Density", func() {
|
||||
LogFunc: framework.Logf,
|
||||
})
|
||||
}
|
||||
e2eStartupTime = runDensityTest(dConfig, testPhaseDurations)
|
||||
e2eStartupTime = runDensityTest(dConfig, testPhaseDurations, scheduleThroughputs)
|
||||
if itArg.runLatencyTest {
|
||||
By("Scheduling additional Pods to measure startup latencies")
|
||||
|
||||
|
@ -655,6 +655,7 @@ type RCStartupStatus struct {
|
||||
RunningButNotReady int
|
||||
Waiting int
|
||||
Pending int
|
||||
Scheduled int
|
||||
Unknown int
|
||||
Inactive int
|
||||
FailedContainers int
|
||||
@ -708,6 +709,10 @@ func ComputeRCStartupStatus(pods []*v1.Pod, expected int) RCStartupStatus {
|
||||
} else if p.Status.Phase == v1.PodUnknown {
|
||||
startupStatus.Unknown++
|
||||
}
|
||||
// Record count of scheduled pods (useful for computing scheduler throughput).
|
||||
if p.Spec.NodeName != "" {
|
||||
startupStatus.Scheduled++
|
||||
}
|
||||
}
|
||||
return startupStatus
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user