Merge pull request #64266 from shyamjvs/measure-max-scheduler-throughput-metric

Automatic merge from submit-queue (batch tested with PRs 63232, 64257, 64183, 64266, 64134). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

Measure scheduler throughput in density test

This is a step towards exposing scheduler-related metrics on [perf-dash](http://perf-dash.k8s.io/).
This particular PR adds scheduler throughput computation and makes the results available in our test artifacts.
So if you do some experiments, you'll have some historical baseline data to compare against.

xref https://github.com/kubernetes/kubernetes/issues/63493

fyi - @wojtek-t @davidopp @bsalamat @misterikkit 
cc @kubernetes/sig-scheduling-misc @kubernetes/sig-scalability-misc 

```release-note
NONE
```
This commit is contained in:
Kubernetes Submit Queue 2018-05-25 08:24:22 -07:00 committed by GitHub
commit b8db949560
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 43 additions and 25 deletions

View File

@ -206,21 +206,22 @@ func (l *PodStartupLatency) PrintJSON() string {
return PrettyPrintJSON(PodStartupLatencyToPerfData(l))
}
type SchedulingLatency struct {
Scheduling LatencyMetric `json:"scheduling"`
Binding LatencyMetric `json:"binding"`
Total LatencyMetric `json:"total"`
type SchedulingMetrics struct {
SchedulingLatency LatencyMetric `json:"schedulingLatency"`
BindingLatency LatencyMetric `json:"bindingLatency"`
E2ELatency LatencyMetric `json:"e2eLatency"`
ThroughputSamples []float64 `json:"throughputSamples"`
}
func (l *SchedulingLatency) SummaryKind() string {
return "SchedulingLatency"
func (l *SchedulingMetrics) SummaryKind() string {
return "SchedulingMetrics"
}
func (l *SchedulingLatency) PrintHumanReadable() string {
func (l *SchedulingMetrics) PrintHumanReadable() string {
return PrettyPrintJSON(l)
}
func (l *SchedulingLatency) PrintJSON() string {
func (l *SchedulingMetrics) PrintJSON() string {
return PrettyPrintJSON(l)
}
@ -438,9 +439,9 @@ func getMetrics(c clientset.Interface) (string, error) {
return string(body), nil
}
// Retrieves scheduler metrics information.
func getSchedulingLatency(c clientset.Interface) (*SchedulingLatency, error) {
result := SchedulingLatency{}
// Retrieves scheduler latency metrics.
func getSchedulingLatency(c clientset.Interface) (*SchedulingMetrics, error) {
result := SchedulingMetrics{}
// Check if master Node is registered
nodes, err := c.CoreV1().Nodes().List(metav1.ListOptions{})
@ -491,11 +492,11 @@ func getSchedulingLatency(c clientset.Interface) (*SchedulingLatency, error) {
var metric *LatencyMetric = nil
switch sample.Metric[model.MetricNameLabel] {
case "scheduler_scheduling_algorithm_latency_microseconds":
metric = &result.Scheduling
metric = &result.SchedulingLatency
case "scheduler_binding_latency_microseconds":
metric = &result.Binding
metric = &result.BindingLatency
case "scheduler_e2e_scheduling_latency_microseconds":
metric = &result.Total
metric = &result.E2ELatency
}
if metric == nil {
continue
@ -512,7 +513,7 @@ func getSchedulingLatency(c clientset.Interface) (*SchedulingLatency, error) {
}
// Verifies (currently just by logging them) the scheduling latencies.
func VerifySchedulerLatency(c clientset.Interface) (*SchedulingLatency, error) {
func VerifySchedulerLatency(c clientset.Interface) (*SchedulingMetrics, error) {
latency, err := getSchedulingLatency(c)
if err != nil {
return nil, err

View File

@ -224,32 +224,42 @@ func density30AddonResourceVerifier(numNodes int) map[string]framework.ResourceC
return constraints
}
func logPodStartupStatus(c clientset.Interface, expectedPods int, observedLabels map[string]string, period time.Duration, stopCh chan struct{}) {
func logPodStartupStatus(
c clientset.Interface,
expectedPods int,
observedLabels map[string]string,
period time.Duration,
scheduleThroughputs []float64,
stopCh chan struct{}) {
label := labels.SelectorFromSet(labels.Set(observedLabels))
podStore, err := testutils.NewPodStore(c, metav1.NamespaceAll, label, fields.Everything())
framework.ExpectNoError(err)
defer podStore.Stop()
ticker := time.NewTicker(period)
startupStatus := testutils.ComputeRCStartupStatus(podStore.List(), expectedPods)
lastScheduledCount := startupStatus.Scheduled
defer ticker.Stop()
for {
select {
case <-ticker.C:
pods := podStore.List()
startupStatus := testutils.ComputeRCStartupStatus(pods, expectedPods)
framework.Logf(startupStatus.String("Density"))
case <-stopCh:
pods := podStore.List()
startupStatus := testutils.ComputeRCStartupStatus(pods, expectedPods)
framework.Logf(startupStatus.String("Density"))
return
}
// Log status of the pods.
startupStatus := testutils.ComputeRCStartupStatus(podStore.List(), expectedPods)
framework.Logf(startupStatus.String("Density"))
// Compute scheduling throughput for the latest time period.
throughput := float64(startupStatus.Scheduled-lastScheduledCount) / float64(period/time.Second)
scheduleThroughputs = append(scheduleThroughputs, throughput)
lastScheduledCount = startupStatus.Scheduled
}
}
// runDensityTest will perform a density test and return the time it took for
// all pods to start
func runDensityTest(dtc DensityTestConfig, testPhaseDurations *timer.TestPhaseTimer) time.Duration {
func runDensityTest(dtc DensityTestConfig, testPhaseDurations *timer.TestPhaseTimer, scheduleThroughputs []float64) time.Duration {
defer GinkgoRecover()
// Create all secrets, configmaps and daemons.
@ -274,7 +284,7 @@ func runDensityTest(dtc DensityTestConfig, testPhaseDurations *timer.TestPhaseTi
}()
}
logStopCh := make(chan struct{})
go logPodStartupStatus(dtc.ClientSets[0], dtc.PodCount, map[string]string{"type": "densityPod"}, dtc.PollInterval, logStopCh)
go logPodStartupStatus(dtc.ClientSets[0], dtc.PodCount, map[string]string{"type": "densityPod"}, dtc.PollInterval, scheduleThroughputs, logStopCh)
wg.Wait()
startupTime := time.Since(startTime)
close(logStopCh)
@ -355,6 +365,7 @@ var _ = SIGDescribe("Density", func() {
var nodeCpuCapacity int64
var nodeMemCapacity int64
var nodes *v1.NodeList
var scheduleThroughputs []float64
testCaseBaseName := "density"
missingMeasurements := 0
@ -397,6 +408,7 @@ var _ = SIGDescribe("Density", func() {
latency, err := framework.VerifySchedulerLatency(c)
framework.ExpectNoError(err)
if err == nil {
latency.ThroughputSamples = scheduleThroughputs
summaries = append(summaries, latency)
}
summaries = append(summaries, testPhaseDurations)
@ -643,7 +655,7 @@ var _ = SIGDescribe("Density", func() {
LogFunc: framework.Logf,
})
}
e2eStartupTime = runDensityTest(dConfig, testPhaseDurations)
e2eStartupTime = runDensityTest(dConfig, testPhaseDurations, scheduleThroughputs)
if itArg.runLatencyTest {
By("Scheduling additional Pods to measure startup latencies")

View File

@ -655,6 +655,7 @@ type RCStartupStatus struct {
RunningButNotReady int
Waiting int
Pending int
Scheduled int
Unknown int
Inactive int
FailedContainers int
@ -708,6 +709,10 @@ func ComputeRCStartupStatus(pods []*v1.Pod, expected int) RCStartupStatus {
} else if p.Status.Phase == v1.PodUnknown {
startupStatus.Unknown++
}
// Record count of scheduled pods (useful for computing scheduler throughput).
if p.Spec.NodeName != "" {
startupStatus.Scheduled++
}
}
return startupStatus
}