Merge pull request #92650 from ahg-g/ahg-attempts

breakdown PodSchedulingDuration by number of attempts
This commit is contained in:
Kubernetes Prow Robot 2020-07-02 04:17:15 -07:00 committed by GitHub
commit 15a9430ae5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 13 additions and 4 deletions

View File

@ -154,7 +154,7 @@ var (
StabilityLevel: metrics.ALPHA,
}, []string{"work"})
PodSchedulingDuration = metrics.NewHistogram(
PodSchedulingDuration = metrics.NewHistogramVec(
&metrics.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "pod_scheduling_duration_seconds",
@ -162,7 +162,8 @@ var (
// Start with 1ms with the last bucket being [~16s, Inf)
Buckets: metrics.ExponentialBuckets(0.001, 2, 15),
StabilityLevel: metrics.ALPHA,
})
},
[]string{"attempts"})
PodSchedulingAttempts = metrics.NewHistogram(
&metrics.HistogramOpts{

View File

@ -612,10 +612,9 @@ func (sched *Scheduler) scheduleOne(ctx context.Context) {
if klog.V(2).Enabled() {
klog.InfoS("Successfully bound pod to node", "pod", klog.KObj(pod), "node", scheduleResult.SuggestedHost, "evaluatedNodes", scheduleResult.EvaluatedNodes, "feasibleNodes", scheduleResult.FeasibleNodes)
}
metrics.PodScheduled(prof.Name, metrics.SinceInSeconds(start))
metrics.PodSchedulingAttempts.Observe(float64(podInfo.Attempts))
metrics.PodSchedulingDuration.Observe(metrics.SinceInSeconds(podInfo.InitialAttemptTimestamp))
metrics.PodSchedulingDuration.WithLabelValues(getAttemptsLabel(podInfo)).Observe(metrics.SinceInSeconds(podInfo.InitialAttemptTimestamp))
// Run "postbind" plugins.
prof.RunPostBindPlugins(bindingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
@ -623,6 +622,15 @@ func (sched *Scheduler) scheduleOne(ctx context.Context) {
}()
}
func getAttemptsLabel(p *framework.QueuedPodInfo) string {
// We breakdown the pod scheduling duration by attempts capped to a limit
// to avoid ending up with a high cardinality metric.
if p.Attempts >= 15 {
return "15+"
}
return string(p.Attempts)
}
func (sched *Scheduler) profileForPod(pod *v1.Pod) (*profile.Profile, error) {
prof, ok := sched.Profiles[pod.Spec.SchedulerName]
if !ok {