mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 11:50:44 +00:00
Merge pull request #64266 from shyamjvs/measure-max-scheduler-throughput-metric
Automatic merge from submit-queue (batch tested with PRs 63232, 64257, 64183, 64266, 64134). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Measure scheduler throughput in density test This is a step towards exposing scheduler-related metrics on [perf-dash](http://perf-dash.k8s.io/). This particular PR adds scheduler throughput computation and makes the results available in our test artifacts. So if you do some experiments, you'll have some historical baseline data to compare against. xref https://github.com/kubernetes/kubernetes/issues/63493 fyi - @wojtek-t @davidopp @bsalamat @misterikkit cc @kubernetes/sig-scheduling-misc @kubernetes/sig-scalability-misc ```release-note NONE ```
This commit is contained in:
commit
b8db949560
@ -206,21 +206,22 @@ func (l *PodStartupLatency) PrintJSON() string {
|
|||||||
return PrettyPrintJSON(PodStartupLatencyToPerfData(l))
|
return PrettyPrintJSON(PodStartupLatencyToPerfData(l))
|
||||||
}
|
}
|
||||||
|
|
||||||
type SchedulingLatency struct {
|
type SchedulingMetrics struct {
|
||||||
Scheduling LatencyMetric `json:"scheduling"`
|
SchedulingLatency LatencyMetric `json:"schedulingLatency"`
|
||||||
Binding LatencyMetric `json:"binding"`
|
BindingLatency LatencyMetric `json:"bindingLatency"`
|
||||||
Total LatencyMetric `json:"total"`
|
E2ELatency LatencyMetric `json:"e2eLatency"`
|
||||||
|
ThroughputSamples []float64 `json:"throughputSamples"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (l *SchedulingLatency) SummaryKind() string {
|
func (l *SchedulingMetrics) SummaryKind() string {
|
||||||
return "SchedulingLatency"
|
return "SchedulingMetrics"
|
||||||
}
|
}
|
||||||
|
|
||||||
func (l *SchedulingLatency) PrintHumanReadable() string {
|
func (l *SchedulingMetrics) PrintHumanReadable() string {
|
||||||
return PrettyPrintJSON(l)
|
return PrettyPrintJSON(l)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (l *SchedulingLatency) PrintJSON() string {
|
func (l *SchedulingMetrics) PrintJSON() string {
|
||||||
return PrettyPrintJSON(l)
|
return PrettyPrintJSON(l)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -438,9 +439,9 @@ func getMetrics(c clientset.Interface) (string, error) {
|
|||||||
return string(body), nil
|
return string(body), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Retrieves scheduler metrics information.
|
// Retrieves scheduler latency metrics.
|
||||||
func getSchedulingLatency(c clientset.Interface) (*SchedulingLatency, error) {
|
func getSchedulingLatency(c clientset.Interface) (*SchedulingMetrics, error) {
|
||||||
result := SchedulingLatency{}
|
result := SchedulingMetrics{}
|
||||||
|
|
||||||
// Check if master Node is registered
|
// Check if master Node is registered
|
||||||
nodes, err := c.CoreV1().Nodes().List(metav1.ListOptions{})
|
nodes, err := c.CoreV1().Nodes().List(metav1.ListOptions{})
|
||||||
@ -491,11 +492,11 @@ func getSchedulingLatency(c clientset.Interface) (*SchedulingLatency, error) {
|
|||||||
var metric *LatencyMetric = nil
|
var metric *LatencyMetric = nil
|
||||||
switch sample.Metric[model.MetricNameLabel] {
|
switch sample.Metric[model.MetricNameLabel] {
|
||||||
case "scheduler_scheduling_algorithm_latency_microseconds":
|
case "scheduler_scheduling_algorithm_latency_microseconds":
|
||||||
metric = &result.Scheduling
|
metric = &result.SchedulingLatency
|
||||||
case "scheduler_binding_latency_microseconds":
|
case "scheduler_binding_latency_microseconds":
|
||||||
metric = &result.Binding
|
metric = &result.BindingLatency
|
||||||
case "scheduler_e2e_scheduling_latency_microseconds":
|
case "scheduler_e2e_scheduling_latency_microseconds":
|
||||||
metric = &result.Total
|
metric = &result.E2ELatency
|
||||||
}
|
}
|
||||||
if metric == nil {
|
if metric == nil {
|
||||||
continue
|
continue
|
||||||
@ -512,7 +513,7 @@ func getSchedulingLatency(c clientset.Interface) (*SchedulingLatency, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Verifies (currently just by logging them) the scheduling latencies.
|
// Verifies (currently just by logging them) the scheduling latencies.
|
||||||
func VerifySchedulerLatency(c clientset.Interface) (*SchedulingLatency, error) {
|
func VerifySchedulerLatency(c clientset.Interface) (*SchedulingMetrics, error) {
|
||||||
latency, err := getSchedulingLatency(c)
|
latency, err := getSchedulingLatency(c)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
@ -224,32 +224,42 @@ func density30AddonResourceVerifier(numNodes int) map[string]framework.ResourceC
|
|||||||
return constraints
|
return constraints
|
||||||
}
|
}
|
||||||
|
|
||||||
func logPodStartupStatus(c clientset.Interface, expectedPods int, observedLabels map[string]string, period time.Duration, stopCh chan struct{}) {
|
func logPodStartupStatus(
|
||||||
|
c clientset.Interface,
|
||||||
|
expectedPods int,
|
||||||
|
observedLabels map[string]string,
|
||||||
|
period time.Duration,
|
||||||
|
scheduleThroughputs []float64,
|
||||||
|
stopCh chan struct{}) {
|
||||||
|
|
||||||
label := labels.SelectorFromSet(labels.Set(observedLabels))
|
label := labels.SelectorFromSet(labels.Set(observedLabels))
|
||||||
podStore, err := testutils.NewPodStore(c, metav1.NamespaceAll, label, fields.Everything())
|
podStore, err := testutils.NewPodStore(c, metav1.NamespaceAll, label, fields.Everything())
|
||||||
framework.ExpectNoError(err)
|
framework.ExpectNoError(err)
|
||||||
defer podStore.Stop()
|
defer podStore.Stop()
|
||||||
|
|
||||||
ticker := time.NewTicker(period)
|
ticker := time.NewTicker(period)
|
||||||
|
startupStatus := testutils.ComputeRCStartupStatus(podStore.List(), expectedPods)
|
||||||
|
lastScheduledCount := startupStatus.Scheduled
|
||||||
defer ticker.Stop()
|
defer ticker.Stop()
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-ticker.C:
|
case <-ticker.C:
|
||||||
pods := podStore.List()
|
|
||||||
startupStatus := testutils.ComputeRCStartupStatus(pods, expectedPods)
|
|
||||||
framework.Logf(startupStatus.String("Density"))
|
|
||||||
case <-stopCh:
|
case <-stopCh:
|
||||||
pods := podStore.List()
|
|
||||||
startupStatus := testutils.ComputeRCStartupStatus(pods, expectedPods)
|
|
||||||
framework.Logf(startupStatus.String("Density"))
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
// Log status of the pods.
|
||||||
|
startupStatus := testutils.ComputeRCStartupStatus(podStore.List(), expectedPods)
|
||||||
|
framework.Logf(startupStatus.String("Density"))
|
||||||
|
// Compute scheduling throughput for the latest time period.
|
||||||
|
throughput := float64(startupStatus.Scheduled-lastScheduledCount) / float64(period/time.Second)
|
||||||
|
scheduleThroughputs = append(scheduleThroughputs, throughput)
|
||||||
|
lastScheduledCount = startupStatus.Scheduled
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// runDensityTest will perform a density test and return the time it took for
|
// runDensityTest will perform a density test and return the time it took for
|
||||||
// all pods to start
|
// all pods to start
|
||||||
func runDensityTest(dtc DensityTestConfig, testPhaseDurations *timer.TestPhaseTimer) time.Duration {
|
func runDensityTest(dtc DensityTestConfig, testPhaseDurations *timer.TestPhaseTimer, scheduleThroughputs []float64) time.Duration {
|
||||||
defer GinkgoRecover()
|
defer GinkgoRecover()
|
||||||
|
|
||||||
// Create all secrets, configmaps and daemons.
|
// Create all secrets, configmaps and daemons.
|
||||||
@ -274,7 +284,7 @@ func runDensityTest(dtc DensityTestConfig, testPhaseDurations *timer.TestPhaseTi
|
|||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
logStopCh := make(chan struct{})
|
logStopCh := make(chan struct{})
|
||||||
go logPodStartupStatus(dtc.ClientSets[0], dtc.PodCount, map[string]string{"type": "densityPod"}, dtc.PollInterval, logStopCh)
|
go logPodStartupStatus(dtc.ClientSets[0], dtc.PodCount, map[string]string{"type": "densityPod"}, dtc.PollInterval, scheduleThroughputs, logStopCh)
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
startupTime := time.Since(startTime)
|
startupTime := time.Since(startTime)
|
||||||
close(logStopCh)
|
close(logStopCh)
|
||||||
@ -355,6 +365,7 @@ var _ = SIGDescribe("Density", func() {
|
|||||||
var nodeCpuCapacity int64
|
var nodeCpuCapacity int64
|
||||||
var nodeMemCapacity int64
|
var nodeMemCapacity int64
|
||||||
var nodes *v1.NodeList
|
var nodes *v1.NodeList
|
||||||
|
var scheduleThroughputs []float64
|
||||||
|
|
||||||
testCaseBaseName := "density"
|
testCaseBaseName := "density"
|
||||||
missingMeasurements := 0
|
missingMeasurements := 0
|
||||||
@ -397,6 +408,7 @@ var _ = SIGDescribe("Density", func() {
|
|||||||
latency, err := framework.VerifySchedulerLatency(c)
|
latency, err := framework.VerifySchedulerLatency(c)
|
||||||
framework.ExpectNoError(err)
|
framework.ExpectNoError(err)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
|
latency.ThroughputSamples = scheduleThroughputs
|
||||||
summaries = append(summaries, latency)
|
summaries = append(summaries, latency)
|
||||||
}
|
}
|
||||||
summaries = append(summaries, testPhaseDurations)
|
summaries = append(summaries, testPhaseDurations)
|
||||||
@ -643,7 +655,7 @@ var _ = SIGDescribe("Density", func() {
|
|||||||
LogFunc: framework.Logf,
|
LogFunc: framework.Logf,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
e2eStartupTime = runDensityTest(dConfig, testPhaseDurations)
|
e2eStartupTime = runDensityTest(dConfig, testPhaseDurations, scheduleThroughputs)
|
||||||
if itArg.runLatencyTest {
|
if itArg.runLatencyTest {
|
||||||
By("Scheduling additional Pods to measure startup latencies")
|
By("Scheduling additional Pods to measure startup latencies")
|
||||||
|
|
||||||
|
@ -655,6 +655,7 @@ type RCStartupStatus struct {
|
|||||||
RunningButNotReady int
|
RunningButNotReady int
|
||||||
Waiting int
|
Waiting int
|
||||||
Pending int
|
Pending int
|
||||||
|
Scheduled int
|
||||||
Unknown int
|
Unknown int
|
||||||
Inactive int
|
Inactive int
|
||||||
FailedContainers int
|
FailedContainers int
|
||||||
@ -708,6 +709,10 @@ func ComputeRCStartupStatus(pods []*v1.Pod, expected int) RCStartupStatus {
|
|||||||
} else if p.Status.Phase == v1.PodUnknown {
|
} else if p.Status.Phase == v1.PodUnknown {
|
||||||
startupStatus.Unknown++
|
startupStatus.Unknown++
|
||||||
}
|
}
|
||||||
|
// Record count of scheduled pods (useful for computing scheduler throughput).
|
||||||
|
if p.Spec.NodeName != "" {
|
||||||
|
startupStatus.Scheduled++
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return startupStatus
|
return startupStatus
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user