Update bucket for scheduler framework latency histograms.

This commit is contained in:
Cong Liu 2019-11-13 16:35:59 -05:00
parent e7ce8d8a6f
commit 8995c1e030
3 changed files with 22 additions and 18 deletions

View File

@ -159,7 +159,7 @@ var (
StabilityLevel: metrics.ALPHA, StabilityLevel: metrics.ALPHA,
}, },
) )
SchedulingAlgorithmPremptionEvaluationDuration = metrics.NewHistogram( SchedulingAlgorithmPreemptionEvaluationDuration = metrics.NewHistogram(
&metrics.HistogramOpts{ &metrics.HistogramOpts{
Subsystem: SchedulerSubsystem, Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_preemption_evaluation_seconds", Name: "scheduling_algorithm_preemption_evaluation_seconds",
@ -168,7 +168,7 @@ var (
StabilityLevel: metrics.ALPHA, StabilityLevel: metrics.ALPHA,
}, },
) )
DeprecatedSchedulingAlgorithmPremptionEvaluationDuration = metrics.NewHistogram( DeprecatedSchedulingAlgorithmPreemptionEvaluationDuration = metrics.NewHistogram(
&metrics.HistogramOpts{ &metrics.HistogramOpts{
Subsystem: SchedulerSubsystem, Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_preemption_evaluation", Name: "scheduling_algorithm_preemption_evaluation",
@ -231,6 +231,7 @@ var (
Subsystem: SchedulerSubsystem, Subsystem: SchedulerSubsystem,
Name: "pod_scheduling_duration_seconds", Name: "pod_scheduling_duration_seconds",
Help: "E2e latency for a pod being scheduled which may include multiple scheduling attempts.", Help: "E2e latency for a pod being scheduled which may include multiple scheduling attempts.",
// Start with 1ms with the last bucket being [~16s, Inf)
Buckets: metrics.ExponentialBuckets(0.001, 2, 15), Buckets: metrics.ExponentialBuckets(0.001, 2, 15),
StabilityLevel: metrics.ALPHA, StabilityLevel: metrics.ALPHA,
}) })
@ -249,7 +250,8 @@ var (
Subsystem: SchedulerSubsystem, Subsystem: SchedulerSubsystem,
Name: "framework_extension_point_duration_seconds", Name: "framework_extension_point_duration_seconds",
Help: "Latency for running all plugins of a specific extension point.", Help: "Latency for running all plugins of a specific extension point.",
Buckets: nil, // Start with 0.1ms with the last bucket being [~200ms, Inf)
Buckets: metrics.ExponentialBuckets(0.0001, 2, 12),
StabilityLevel: metrics.ALPHA, StabilityLevel: metrics.ALPHA,
}, },
[]string{"extension_point", "status"}) []string{"extension_point", "status"})
@ -259,7 +261,9 @@ var (
Subsystem: SchedulerSubsystem, Subsystem: SchedulerSubsystem,
Name: "plugin_execution_duration_seconds", Name: "plugin_execution_duration_seconds",
Help: "Duration for running a plugin at a specific extension point.", Help: "Duration for running a plugin at a specific extension point.",
Buckets: nil, // Start with 0.01ms with the last bucket being [~22ms, Inf). We use a small factor (1.5)
// so that we have better granularity since plugin latency is very sensitive.
Buckets: metrics.ExponentialBuckets(0.00001, 1.5, 20),
StabilityLevel: metrics.ALPHA, StabilityLevel: metrics.ALPHA,
}, },
[]string{"plugin", "extension_point", "status"}) []string{"plugin", "extension_point", "status"})
@ -304,8 +308,8 @@ var (
DeprecatedSchedulingAlgorithmPredicateEvaluationDuration, DeprecatedSchedulingAlgorithmPredicateEvaluationDuration,
SchedulingAlgorithmPriorityEvaluationDuration, SchedulingAlgorithmPriorityEvaluationDuration,
DeprecatedSchedulingAlgorithmPriorityEvaluationDuration, DeprecatedSchedulingAlgorithmPriorityEvaluationDuration,
SchedulingAlgorithmPremptionEvaluationDuration, SchedulingAlgorithmPreemptionEvaluationDuration,
DeprecatedSchedulingAlgorithmPremptionEvaluationDuration, DeprecatedSchedulingAlgorithmPreemptionEvaluationDuration,
PreemptionVictims, PreemptionVictims,
PreemptionAttempts, PreemptionAttempts,
pendingPods, pendingPods,

View File

@ -614,8 +614,8 @@ func (sched *Scheduler) scheduleOne(ctx context.Context) {
preemptionStartTime := time.Now() preemptionStartTime := time.Now()
sched.preempt(schedulingCycleCtx, state, fwk, pod, fitError) sched.preempt(schedulingCycleCtx, state, fwk, pod, fitError)
metrics.PreemptionAttempts.Inc() metrics.PreemptionAttempts.Inc()
metrics.SchedulingAlgorithmPremptionEvaluationDuration.Observe(metrics.SinceInSeconds(preemptionStartTime)) metrics.SchedulingAlgorithmPreemptionEvaluationDuration.Observe(metrics.SinceInSeconds(preemptionStartTime))
metrics.DeprecatedSchedulingAlgorithmPremptionEvaluationDuration.Observe(metrics.SinceInMicroseconds(preemptionStartTime)) metrics.DeprecatedSchedulingAlgorithmPreemptionEvaluationDuration.Observe(metrics.SinceInMicroseconds(preemptionStartTime))
metrics.SchedulingLatency.WithLabelValues(metrics.PreemptionEvaluation).Observe(metrics.SinceInSeconds(preemptionStartTime)) metrics.SchedulingLatency.WithLabelValues(metrics.PreemptionEvaluation).Observe(metrics.SinceInSeconds(preemptionStartTime))
metrics.DeprecatedSchedulingLatency.WithLabelValues(metrics.PreemptionEvaluation).Observe(metrics.SinceInSeconds(preemptionStartTime)) metrics.DeprecatedSchedulingLatency.WithLabelValues(metrics.PreemptionEvaluation).Observe(metrics.SinceInSeconds(preemptionStartTime))
} }

View File

@ -48,7 +48,7 @@ var (
// BenchmarkScheduling benchmarks the scheduling rate when the cluster has // BenchmarkScheduling benchmarks the scheduling rate when the cluster has
// various quantities of nodes and scheduled pods. // various quantities of nodes and scheduled pods.
func BenchmarkSchedulingV(b *testing.B) { func BenchmarkScheduling(b *testing.B) {
tests := []struct{ nodes, existingPods, minPods int }{ tests := []struct{ nodes, existingPods, minPods int }{
{nodes: 100, existingPods: 0, minPods: 100}, {nodes: 100, existingPods: 0, minPods: 100},
{nodes: 100, existingPods: 1000, minPods: 100}, {nodes: 100, existingPods: 1000, minPods: 100},