Merge pull request #64266 from shyamjvs/measure-max-scheduler-throughput-metric

Automatic merge from submit-queue (batch tested with PRs 63232, 64257, 64183, 64266, 64134). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

Measure scheduler throughput in density test

This is a step towards exposing scheduler-related metrics on [perf-dash](http://perf-dash.k8s.io/).
This particular PR adds scheduler throughput computation and makes the results available in our test artifacts.
So if you do some experiments, you'll have some historical baseline data to compare against.

xref https://github.com/kubernetes/kubernetes/issues/63493

fyi - @wojtek-t @davidopp @bsalamat @misterikkit 
cc @kubernetes/sig-scheduling-misc @kubernetes/sig-scalability-misc 

```release-note
NONE
```
This commit is contained in:
Kubernetes Submit Queue 2018-05-25 08:24:22 -07:00 committed by GitHub
commit b8db949560
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 43 additions and 25 deletions

View File

@ -206,21 +206,22 @@ func (l *PodStartupLatency) PrintJSON() string {
return PrettyPrintJSON(PodStartupLatencyToPerfData(l)) return PrettyPrintJSON(PodStartupLatencyToPerfData(l))
} }
type SchedulingLatency struct { type SchedulingMetrics struct {
Scheduling LatencyMetric `json:"scheduling"` SchedulingLatency LatencyMetric `json:"schedulingLatency"`
Binding LatencyMetric `json:"binding"` BindingLatency LatencyMetric `json:"bindingLatency"`
Total LatencyMetric `json:"total"` E2ELatency LatencyMetric `json:"e2eLatency"`
ThroughputSamples []float64 `json:"throughputSamples"`
} }
func (l *SchedulingLatency) SummaryKind() string { func (l *SchedulingMetrics) SummaryKind() string {
return "SchedulingLatency" return "SchedulingMetrics"
} }
func (l *SchedulingLatency) PrintHumanReadable() string { func (l *SchedulingMetrics) PrintHumanReadable() string {
return PrettyPrintJSON(l) return PrettyPrintJSON(l)
} }
func (l *SchedulingLatency) PrintJSON() string { func (l *SchedulingMetrics) PrintJSON() string {
return PrettyPrintJSON(l) return PrettyPrintJSON(l)
} }
@ -438,9 +439,9 @@ func getMetrics(c clientset.Interface) (string, error) {
return string(body), nil return string(body), nil
} }
// Retrieves scheduler metrics information. // Retrieves scheduler latency metrics.
func getSchedulingLatency(c clientset.Interface) (*SchedulingLatency, error) { func getSchedulingLatency(c clientset.Interface) (*SchedulingMetrics, error) {
result := SchedulingLatency{} result := SchedulingMetrics{}
// Check if master Node is registered // Check if master Node is registered
nodes, err := c.CoreV1().Nodes().List(metav1.ListOptions{}) nodes, err := c.CoreV1().Nodes().List(metav1.ListOptions{})
@ -491,11 +492,11 @@ func getSchedulingLatency(c clientset.Interface) (*SchedulingLatency, error) {
var metric *LatencyMetric = nil var metric *LatencyMetric = nil
switch sample.Metric[model.MetricNameLabel] { switch sample.Metric[model.MetricNameLabel] {
case "scheduler_scheduling_algorithm_latency_microseconds": case "scheduler_scheduling_algorithm_latency_microseconds":
metric = &result.Scheduling metric = &result.SchedulingLatency
case "scheduler_binding_latency_microseconds": case "scheduler_binding_latency_microseconds":
metric = &result.Binding metric = &result.BindingLatency
case "scheduler_e2e_scheduling_latency_microseconds": case "scheduler_e2e_scheduling_latency_microseconds":
metric = &result.Total metric = &result.E2ELatency
} }
if metric == nil { if metric == nil {
continue continue
@ -512,7 +513,7 @@ func getSchedulingLatency(c clientset.Interface) (*SchedulingLatency, error) {
} }
// Verifies (currently just by logging them) the scheduling latencies. // Verifies (currently just by logging them) the scheduling latencies.
func VerifySchedulerLatency(c clientset.Interface) (*SchedulingLatency, error) { func VerifySchedulerLatency(c clientset.Interface) (*SchedulingMetrics, error) {
latency, err := getSchedulingLatency(c) latency, err := getSchedulingLatency(c)
if err != nil { if err != nil {
return nil, err return nil, err

View File

@ -224,32 +224,42 @@ func density30AddonResourceVerifier(numNodes int) map[string]framework.ResourceC
return constraints return constraints
} }
func logPodStartupStatus(c clientset.Interface, expectedPods int, observedLabels map[string]string, period time.Duration, stopCh chan struct{}) { func logPodStartupStatus(
c clientset.Interface,
expectedPods int,
observedLabels map[string]string,
period time.Duration,
scheduleThroughputs []float64,
stopCh chan struct{}) {
label := labels.SelectorFromSet(labels.Set(observedLabels)) label := labels.SelectorFromSet(labels.Set(observedLabels))
podStore, err := testutils.NewPodStore(c, metav1.NamespaceAll, label, fields.Everything()) podStore, err := testutils.NewPodStore(c, metav1.NamespaceAll, label, fields.Everything())
framework.ExpectNoError(err) framework.ExpectNoError(err)
defer podStore.Stop() defer podStore.Stop()
ticker := time.NewTicker(period) ticker := time.NewTicker(period)
startupStatus := testutils.ComputeRCStartupStatus(podStore.List(), expectedPods)
lastScheduledCount := startupStatus.Scheduled
defer ticker.Stop() defer ticker.Stop()
for { for {
select { select {
case <-ticker.C: case <-ticker.C:
pods := podStore.List()
startupStatus := testutils.ComputeRCStartupStatus(pods, expectedPods)
framework.Logf(startupStatus.String("Density"))
case <-stopCh: case <-stopCh:
pods := podStore.List()
startupStatus := testutils.ComputeRCStartupStatus(pods, expectedPods)
framework.Logf(startupStatus.String("Density"))
return return
} }
// Log status of the pods.
startupStatus := testutils.ComputeRCStartupStatus(podStore.List(), expectedPods)
framework.Logf(startupStatus.String("Density"))
// Compute scheduling throughput for the latest time period.
throughput := float64(startupStatus.Scheduled-lastScheduledCount) / float64(period/time.Second)
scheduleThroughputs = append(scheduleThroughputs, throughput)
lastScheduledCount = startupStatus.Scheduled
} }
} }
// runDensityTest will perform a density test and return the time it took for // runDensityTest will perform a density test and return the time it took for
// all pods to start // all pods to start
func runDensityTest(dtc DensityTestConfig, testPhaseDurations *timer.TestPhaseTimer) time.Duration { func runDensityTest(dtc DensityTestConfig, testPhaseDurations *timer.TestPhaseTimer, scheduleThroughputs []float64) time.Duration {
defer GinkgoRecover() defer GinkgoRecover()
// Create all secrets, configmaps and daemons. // Create all secrets, configmaps and daemons.
@ -274,7 +284,7 @@ func runDensityTest(dtc DensityTestConfig, testPhaseDurations *timer.TestPhaseTi
}() }()
} }
logStopCh := make(chan struct{}) logStopCh := make(chan struct{})
go logPodStartupStatus(dtc.ClientSets[0], dtc.PodCount, map[string]string{"type": "densityPod"}, dtc.PollInterval, logStopCh) go logPodStartupStatus(dtc.ClientSets[0], dtc.PodCount, map[string]string{"type": "densityPod"}, dtc.PollInterval, scheduleThroughputs, logStopCh)
wg.Wait() wg.Wait()
startupTime := time.Since(startTime) startupTime := time.Since(startTime)
close(logStopCh) close(logStopCh)
@ -355,6 +365,7 @@ var _ = SIGDescribe("Density", func() {
var nodeCpuCapacity int64 var nodeCpuCapacity int64
var nodeMemCapacity int64 var nodeMemCapacity int64
var nodes *v1.NodeList var nodes *v1.NodeList
var scheduleThroughputs []float64
testCaseBaseName := "density" testCaseBaseName := "density"
missingMeasurements := 0 missingMeasurements := 0
@ -397,6 +408,7 @@ var _ = SIGDescribe("Density", func() {
latency, err := framework.VerifySchedulerLatency(c) latency, err := framework.VerifySchedulerLatency(c)
framework.ExpectNoError(err) framework.ExpectNoError(err)
if err == nil { if err == nil {
latency.ThroughputSamples = scheduleThroughputs
summaries = append(summaries, latency) summaries = append(summaries, latency)
} }
summaries = append(summaries, testPhaseDurations) summaries = append(summaries, testPhaseDurations)
@ -643,7 +655,7 @@ var _ = SIGDescribe("Density", func() {
LogFunc: framework.Logf, LogFunc: framework.Logf,
}) })
} }
e2eStartupTime = runDensityTest(dConfig, testPhaseDurations) e2eStartupTime = runDensityTest(dConfig, testPhaseDurations, scheduleThroughputs)
if itArg.runLatencyTest { if itArg.runLatencyTest {
By("Scheduling additional Pods to measure startup latencies") By("Scheduling additional Pods to measure startup latencies")

View File

@ -655,6 +655,7 @@ type RCStartupStatus struct {
RunningButNotReady int RunningButNotReady int
Waiting int Waiting int
Pending int Pending int
Scheduled int
Unknown int Unknown int
Inactive int Inactive int
FailedContainers int FailedContainers int
@ -708,6 +709,10 @@ func ComputeRCStartupStatus(pods []*v1.Pod, expected int) RCStartupStatus {
} else if p.Status.Phase == v1.PodUnknown { } else if p.Status.Phase == v1.PodUnknown {
startupStatus.Unknown++ startupStatus.Unknown++
} }
// Record count of scheduled pods (useful for computing scheduler throughput).
if p.Spec.NodeName != "" {
startupStatus.Scheduled++
}
} }
return startupStatus return startupStatus
} }