feature: support queueing_hint_execution_duration_seconds metric

This commit is contained in:
Kensei Nakada 2024-07-19 23:13:07 +09:00
parent 01eb9f4754
commit 7ef3cf5d07
2 changed files with 44 additions and 0 deletions

View File

@ -497,6 +497,7 @@ func (p *PriorityQueue) isPodWorthRequeuing(logger klog.Logger, pInfo *framework
continue
}
start := time.Now()
hint, err := hintfn.QueueingHintFn(logger, pod, oldObj, newObj)
if err != nil {
// If the QueueingHintFn returned an error, we should treat the event as Queue so that we can prevent
@ -509,6 +510,8 @@ func (p *PriorityQueue) isPodWorthRequeuing(logger klog.Logger, pInfo *framework
}
hint = framework.Queue
}
metrics.QueueingHintExecutionDuration.WithLabelValues(hintfn.PluginName, event.Label, queueingHintToLabel(hint, err)).Observe(metrics.SinceInSeconds(start))
if hint == framework.QueueSkip {
continue
}
@ -536,6 +539,23 @@ func (p *PriorityQueue) isPodWorthRequeuing(logger klog.Logger, pInfo *framework
return queueStrategy
}
// queueingHintToLabel converts a hint and an error from QHint to a label string.
func queueingHintToLabel(hint framework.QueueingHint, err error) string {
if err != nil {
return metrics.QueueingHintResultError
}
switch hint {
case framework.Queue:
return metrics.QueueingHintResultQueue
case framework.QueueSkip:
return metrics.QueueingHintResultQueueSkip
}
// Shouldn't reach here.
return ""
}
// runPreEnqueuePlugins iterates PreEnqueue function in each registered PreEnqueuePlugin.
// It returns true if all PreEnqueue function run successfully; otherwise returns false
// upon the first failure.

View File

@ -20,8 +20,10 @@ import (
"sync"
"time"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/legacyregistry"
"k8s.io/kubernetes/pkg/features"
volumebindingmetrics "k8s.io/kubernetes/pkg/scheduler/framework/plugins/volumebinding/metrics"
)
@ -73,6 +75,12 @@ const (
Permit = "Permit"
)
const (
QueueingHintResultQueue = "Queue"
QueueingHintResultQueueSkip = "QueueSkip"
QueueingHintResultError = "Error"
)
// All the histogram based metrics have 1ms as size for the smallest bucket.
var (
scheduleAttempts = metrics.NewCounterVec(
@ -198,6 +206,19 @@ var (
},
[]string{"plugin", "extension_point", "status"})
// This is only available when the QHint feature gate is enabled.
QueueingHintExecutionDuration = metrics.NewHistogramVec(
&metrics.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "queueing_hint_execution_duration_seconds",
Help: "Duration for running a queueing hint from a plugin.",
// Start with 0.01ms with the last bucket being [~22ms, Inf). We use a small factor (1.5)
// so that we have better granularity since plugin latency is very sensitive.
Buckets: metrics.ExponentialBuckets(0.00001, 1.5, 20),
StabilityLevel: metrics.ALPHA,
},
[]string{"plugin", "event", "hint"})
SchedulerQueueIncomingPods = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: SchedulerSubsystem,
@ -269,6 +290,9 @@ func Register() {
// Register the metrics.
registerMetrics.Do(func() {
RegisterMetrics(metricsList...)
if utilfeature.DefaultFeatureGate.Enabled(features.SchedulerQueueingHints) {
RegisterMetrics(QueueingHintExecutionDuration)
}
volumebindingmetrics.RegisterVolumeSchedulingMetrics()
})
}