Readding summary metrics

This commit is contained in:
Krzysztof Siedlecki
2018-06-07 13:20:26 +02:00
parent 0547bbf744
commit e32910a544
7 changed files with 128 additions and 51 deletions

View File

@@ -23,13 +23,37 @@ import (
"github.com/prometheus/client_golang/prometheus"
)
const schedulerSubsystem = "scheduler"
const (
// SchedulerSubsystem - subsystem name used by scheduler
SchedulerSubsystem = "scheduler"
// SchedulingLatencyName - scheduler latency metric name
SchedulingLatencyName = "scheduling_latency_seconds"
// OperationLabel - operation label name
OperationLabel = "operation"
// Binding - binding operation label value
Binding = "binding"
// SelectingNode - selecting node operation label value
SelectingNode = "selecting_node"
// E2eScheduling - e2e scheduling operation label value
)
// All the histogram based metrics have 1ms as size for the smallest bucket.
var (
SchedulingLatency = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Subsystem: SchedulerSubsystem,
Name: SchedulingLatencyName,
Help: "Scheduling latency in seconds split by sub-parts of the scheduling operation",
// Make the sliding window of 5h.
// TODO: The value for this should be based on some SLI definition (long term).
MaxAge: 5 * time.Hour,
},
[]string{OperationLabel},
)
E2eSchedulingLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: schedulerSubsystem,
Subsystem: SchedulerSubsystem,
Name: "e2e_scheduling_latency_microseconds",
Help: "E2e scheduling latency (scheduling algorithm + binding)",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
@@ -37,7 +61,7 @@ var (
)
SchedulingAlgorithmLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: schedulerSubsystem,
Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_latency_microseconds",
Help: "Scheduling algorithm latency",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
@@ -45,7 +69,7 @@ var (
)
SchedulingAlgorithmPredicateEvaluationDuration = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: schedulerSubsystem,
Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_predicate_evaluation",
Help: "Scheduling algorithm predicate evaluation duration",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
@@ -53,7 +77,7 @@ var (
)
SchedulingAlgorithmPriorityEvaluationDuration = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: schedulerSubsystem,
Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_priority_evaluation",
Help: "Scheduling algorithm priority evaluation duration",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
@@ -61,7 +85,7 @@ var (
)
SchedulingAlgorithmPremptionEvaluationDuration = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: schedulerSubsystem,
Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_preemption_evaluation",
Help: "Scheduling algorithm preemption evaluation duration",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
@@ -69,7 +93,7 @@ var (
)
BindingLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: schedulerSubsystem,
Subsystem: SchedulerSubsystem,
Name: "binding_latency_microseconds",
Help: "Binding latency",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
@@ -77,16 +101,27 @@ var (
)
PreemptionVictims = prometheus.NewGauge(
prometheus.GaugeOpts{
Subsystem: schedulerSubsystem,
Subsystem: SchedulerSubsystem,
Name: "pod_preemption_victims",
Help: "Number of selected preemption victims",
})
PreemptionAttempts = prometheus.NewCounter(
prometheus.CounterOpts{
Subsystem: schedulerSubsystem,
Subsystem: SchedulerSubsystem,
Name: "total_preemption_attempts",
Help: "Total preemption attempts in the cluster till now",
})
metricsList = []prometheus.Collector{
SchedulingLatency,
E2eSchedulingLatency,
SchedulingAlgorithmLatency,
BindingLatency,
SchedulingAlgorithmPredicateEvaluationDuration,
SchedulingAlgorithmPriorityEvaluationDuration,
SchedulingAlgorithmPremptionEvaluationDuration,
PreemptionVictims,
PreemptionAttempts,
}
)
var registerMetrics sync.Once
@@ -95,19 +130,23 @@ var registerMetrics sync.Once
func Register() {
// Register the metrics.
registerMetrics.Do(func() {
prometheus.MustRegister(E2eSchedulingLatency)
prometheus.MustRegister(SchedulingAlgorithmLatency)
prometheus.MustRegister(BindingLatency)
prometheus.MustRegister(SchedulingAlgorithmPredicateEvaluationDuration)
prometheus.MustRegister(SchedulingAlgorithmPriorityEvaluationDuration)
prometheus.MustRegister(SchedulingAlgorithmPremptionEvaluationDuration)
prometheus.MustRegister(PreemptionVictims)
prometheus.MustRegister(PreemptionAttempts)
for _, metric := range metricsList {
prometheus.MustRegister(metric)
}
})
}
// Reset resets metrics
func Reset() {
SchedulingLatency.Reset()
}
// SinceInMicroseconds gets the time since the specified start in microseconds.
func SinceInMicroseconds(start time.Time) float64 {
return float64(time.Since(start).Nanoseconds() / time.Microsecond.Nanoseconds())
}
// SinceInSeconds gets the time since the specified start in seconds.
func SinceInSeconds(start time.Time) float64 {
return time.Since(start).Seconds()
}