migrate scheduler metrics endpoint to metrics stability framework

This commit is contained in:
Han Kang 2019-08-22 18:00:06 -07:00
parent f0be447922
commit 8da448dbe3
11 changed files with 187 additions and 144 deletions

View File

@ -20,10 +20,9 @@ go_library(
importpath = "k8s.io/kubernetes/cmd/kube-scheduler", importpath = "k8s.io/kubernetes/cmd/kube-scheduler",
deps = [ deps = [
"//cmd/kube-scheduler/app:go_default_library", "//cmd/kube-scheduler/app:go_default_library",
"//pkg/util/prometheusclientgo:go_default_library",
"//pkg/version/prometheus:go_default_library",
"//staging/src/k8s.io/component-base/cli/flag:go_default_library", "//staging/src/k8s.io/component-base/cli/flag:go_default_library",
"//staging/src/k8s.io/component-base/logs:go_default_library", "//staging/src/k8s.io/component-base/logs:go_default_library",
"//staging/src/k8s.io/component-base/metrics/prometheus/clientgo:go_default_library",
"//vendor/github.com/spf13/pflag:go_default_library", "//vendor/github.com/spf13/pflag:go_default_library",
], ],
) )

View File

@ -36,7 +36,7 @@ go_library(
"//staging/src/k8s.io/client-go/tools/leaderelection:go_default_library", "//staging/src/k8s.io/client-go/tools/leaderelection:go_default_library",
"//staging/src/k8s.io/component-base/cli/flag:go_default_library", "//staging/src/k8s.io/component-base/cli/flag:go_default_library",
"//staging/src/k8s.io/component-base/cli/globalflag:go_default_library", "//staging/src/k8s.io/component-base/cli/globalflag:go_default_library",
"//vendor/github.com/prometheus/client_golang/prometheus:go_default_library", "//staging/src/k8s.io/component-base/metrics/legacyregistry:go_default_library",
"//vendor/github.com/spf13/cobra:go_default_library", "//vendor/github.com/spf13/cobra:go_default_library",
"//vendor/k8s.io/klog:go_default_library", "//vendor/k8s.io/klog:go_default_library",
], ],

View File

@ -25,6 +25,8 @@ import (
"os" "os"
goruntime "runtime" goruntime "runtime"
"github.com/spf13/cobra"
utilerrors "k8s.io/apimachinery/pkg/util/errors" utilerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/apiserver/pkg/authentication/authenticator" "k8s.io/apiserver/pkg/authentication/authenticator"
"k8s.io/apiserver/pkg/authorization/authorizer" "k8s.io/apiserver/pkg/authorization/authorizer"
@ -39,6 +41,8 @@ import (
"k8s.io/client-go/tools/leaderelection" "k8s.io/client-go/tools/leaderelection"
cliflag "k8s.io/component-base/cli/flag" cliflag "k8s.io/component-base/cli/flag"
"k8s.io/component-base/cli/globalflag" "k8s.io/component-base/cli/globalflag"
"k8s.io/component-base/metrics/legacyregistry"
"k8s.io/klog"
schedulerserverconfig "k8s.io/kubernetes/cmd/kube-scheduler/app/config" schedulerserverconfig "k8s.io/kubernetes/cmd/kube-scheduler/app/config"
"k8s.io/kubernetes/cmd/kube-scheduler/app/options" "k8s.io/kubernetes/cmd/kube-scheduler/app/options"
"k8s.io/kubernetes/pkg/api/legacyscheme" "k8s.io/kubernetes/pkg/api/legacyscheme"
@ -51,10 +55,6 @@ import (
utilflag "k8s.io/kubernetes/pkg/util/flag" utilflag "k8s.io/kubernetes/pkg/util/flag"
"k8s.io/kubernetes/pkg/version" "k8s.io/kubernetes/pkg/version"
"k8s.io/kubernetes/pkg/version/verflag" "k8s.io/kubernetes/pkg/version/verflag"
"github.com/prometheus/client_golang/prometheus"
"github.com/spf13/cobra"
"k8s.io/klog"
) )
// Option configures a framework.Registry. // Option configures a framework.Registry.
@ -294,7 +294,7 @@ func buildHandlerChain(handler http.Handler, authn authenticator.Request, authz
func installMetricHandler(pathRecorderMux *mux.PathRecorderMux) { func installMetricHandler(pathRecorderMux *mux.PathRecorderMux) {
configz.InstallHandler(pathRecorderMux) configz.InstallHandler(pathRecorderMux)
defaultMetricsHandler := prometheus.Handler().ServeHTTP defaultMetricsHandler := legacyregistry.Handler().ServeHTTP
pathRecorderMux.HandleFunc("/metrics", func(w http.ResponseWriter, req *http.Request) { pathRecorderMux.HandleFunc("/metrics", func(w http.ResponseWriter, req *http.Request) {
if req.Method == "DELETE" { if req.Method == "DELETE" {
metrics.Reset() metrics.Reset()

View File

@ -25,9 +25,8 @@ import (
cliflag "k8s.io/component-base/cli/flag" cliflag "k8s.io/component-base/cli/flag"
"k8s.io/component-base/logs" "k8s.io/component-base/logs"
_ "k8s.io/component-base/metrics/prometheus/clientgo"
"k8s.io/kubernetes/cmd/kube-scheduler/app" "k8s.io/kubernetes/cmd/kube-scheduler/app"
_ "k8s.io/kubernetes/pkg/util/prometheusclientgo" // load all the prometheus client-go plugins
_ "k8s.io/kubernetes/pkg/version/prometheus" // for version metric registration
) )
func main() { func main() {

View File

@ -26,6 +26,8 @@ go_library(
"//staging/src/k8s.io/client-go/kubernetes:go_default_library", "//staging/src/k8s.io/client-go/kubernetes:go_default_library",
"//staging/src/k8s.io/client-go/listers/storage/v1:go_default_library", "//staging/src/k8s.io/client-go/listers/storage/v1:go_default_library",
"//staging/src/k8s.io/client-go/tools/cache:go_default_library", "//staging/src/k8s.io/client-go/tools/cache:go_default_library",
"//staging/src/k8s.io/component-base/metrics:go_default_library",
"//staging/src/k8s.io/component-base/metrics/legacyregistry:go_default_library",
"//vendor/github.com/prometheus/client_golang/prometheus:go_default_library", "//vendor/github.com/prometheus/client_golang/prometheus:go_default_library",
"//vendor/k8s.io/klog:go_default_library", "//vendor/k8s.io/klog:go_default_library",
], ],

View File

@ -18,6 +18,9 @@ package scheduling
import ( import (
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
"k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/legacyregistry"
) )
// VolumeSchedulerSubsystem - subsystem name used by scheduler // VolumeSchedulerSubsystem - subsystem name used by scheduler
@ -25,30 +28,33 @@ const VolumeSchedulerSubsystem = "scheduler_volume"
var ( var (
// VolumeBindingRequestSchedulerBinderCache tracks the number of volume binder cache operations. // VolumeBindingRequestSchedulerBinderCache tracks the number of volume binder cache operations.
VolumeBindingRequestSchedulerBinderCache = prometheus.NewCounterVec( VolumeBindingRequestSchedulerBinderCache = metrics.NewCounterVec(
prometheus.CounterOpts{ &metrics.CounterOpts{
Subsystem: VolumeSchedulerSubsystem, Subsystem: VolumeSchedulerSubsystem,
Name: "binder_cache_requests_total", Name: "binder_cache_requests_total",
Help: "Total number for request volume binding cache", Help: "Total number for request volume binding cache",
StabilityLevel: metrics.ALPHA,
}, },
[]string{"operation"}, []string{"operation"},
) )
// VolumeSchedulingStageLatency tracks the latency of volume scheduling operations. // VolumeSchedulingStageLatency tracks the latency of volume scheduling operations.
VolumeSchedulingStageLatency = prometheus.NewHistogramVec( VolumeSchedulingStageLatency = metrics.NewHistogramVec(
prometheus.HistogramOpts{ &metrics.HistogramOpts{
Subsystem: VolumeSchedulerSubsystem, Subsystem: VolumeSchedulerSubsystem,
Name: "scheduling_duration_seconds", Name: "scheduling_duration_seconds",
Help: "Volume scheduling stage latency", Help: "Volume scheduling stage latency",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15), Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
StabilityLevel: metrics.ALPHA,
}, },
[]string{"operation"}, []string{"operation"},
) )
// VolumeSchedulingStageFailed tracks the number of failed volume scheduling operations. // VolumeSchedulingStageFailed tracks the number of failed volume scheduling operations.
VolumeSchedulingStageFailed = prometheus.NewCounterVec( VolumeSchedulingStageFailed = metrics.NewCounterVec(
prometheus.CounterOpts{ &metrics.CounterOpts{
Subsystem: VolumeSchedulerSubsystem, Subsystem: VolumeSchedulerSubsystem,
Name: "scheduling_stage_error_total", Name: "scheduling_stage_error_total",
Help: "Volume scheduling stage error count", Help: "Volume scheduling stage error count",
StabilityLevel: metrics.ALPHA,
}, },
[]string{"operation"}, []string{"operation"},
) )
@ -57,7 +63,7 @@ var (
// RegisterVolumeSchedulingMetrics is used for scheduler, because the volume binding cache is a library // RegisterVolumeSchedulingMetrics is used for scheduler, because the volume binding cache is a library
// used by scheduler process. // used by scheduler process.
func RegisterVolumeSchedulingMetrics() { func RegisterVolumeSchedulingMetrics() {
prometheus.MustRegister(VolumeBindingRequestSchedulerBinderCache) legacyregistry.MustRegister(VolumeBindingRequestSchedulerBinderCache)
prometheus.MustRegister(VolumeSchedulingStageLatency) legacyregistry.MustRegister(VolumeSchedulingStageLatency)
prometheus.MustRegister(VolumeSchedulingStageFailed) legacyregistry.MustRegister(VolumeSchedulingStageFailed)
} }

View File

@ -1217,6 +1217,7 @@ func TestPodTimestamp(t *testing.T) {
func TestPendingPodsMetric(t *testing.T) { func TestPendingPodsMetric(t *testing.T) {
total := 50 total := 50
timestamp := time.Now() timestamp := time.Now()
metrics.Register()
var pInfos = make([]*framework.PodInfo, 0, total) var pInfos = make([]*framework.PodInfo, 0, total)
for i := 1; i <= total; i++ { for i := 1; i <= total; i++ {
p := &framework.PodInfo{ p := &framework.PodInfo{
@ -1312,9 +1313,9 @@ func TestPendingPodsMetric(t *testing.T) {
} }
resetMetrics := func() { resetMetrics := func() {
metrics.ActivePods.Set(0) metrics.ActivePods().Set(0)
metrics.BackoffPods.Set(0) metrics.BackoffPods().Set(0)
metrics.UnschedulablePods.Set(0) metrics.UnschedulablePods().Set(0)
} }
for _, test := range tests { for _, test := range tests {
@ -1329,7 +1330,7 @@ func TestPendingPodsMetric(t *testing.T) {
var activeNum, backoffNum, unschedulableNum float64 var activeNum, backoffNum, unschedulableNum float64
metricProto := &dto.Metric{} metricProto := &dto.Metric{}
if err := metrics.ActivePods.Write(metricProto); err != nil { if err := metrics.ActivePods().Write(metricProto); err != nil {
t.Errorf("error writing ActivePods metric: %v", err) t.Errorf("error writing ActivePods metric: %v", err)
} }
activeNum = metricProto.Gauge.GetValue() activeNum = metricProto.Gauge.GetValue()
@ -1337,7 +1338,7 @@ func TestPendingPodsMetric(t *testing.T) {
t.Errorf("ActivePods: Expected %v, got %v", test.expected[0], activeNum) t.Errorf("ActivePods: Expected %v, got %v", test.expected[0], activeNum)
} }
if err := metrics.BackoffPods.Write(metricProto); err != nil { if err := metrics.BackoffPods().Write(metricProto); err != nil {
t.Errorf("error writing BackoffPods metric: %v", err) t.Errorf("error writing BackoffPods metric: %v", err)
} }
backoffNum = metricProto.Gauge.GetValue() backoffNum = metricProto.Gauge.GetValue()
@ -1345,7 +1346,7 @@ func TestPendingPodsMetric(t *testing.T) {
t.Errorf("BackoffPods: Expected %v, got %v", test.expected[1], backoffNum) t.Errorf("BackoffPods: Expected %v, got %v", test.expected[1], backoffNum)
} }
if err := metrics.UnschedulablePods.Write(metricProto); err != nil { if err := metrics.UnschedulablePods().Write(metricProto); err != nil {
t.Errorf("error writing UnschedulablePods metric: %v", err) t.Errorf("error writing UnschedulablePods metric: %v", err)
} }
unschedulableNum = metricProto.Gauge.GetValue() unschedulableNum = metricProto.Gauge.GetValue()

View File

@ -11,6 +11,8 @@ go_library(
importpath = "k8s.io/kubernetes/pkg/scheduler/metrics", importpath = "k8s.io/kubernetes/pkg/scheduler/metrics",
deps = [ deps = [
"//pkg/controller/volume/scheduling:go_default_library", "//pkg/controller/volume/scheduling:go_default_library",
"//staging/src/k8s.io/component-base/metrics:go_default_library",
"//staging/src/k8s.io/component-base/metrics/legacyregistry:go_default_library",
"//vendor/github.com/prometheus/client_golang/prometheus:go_default_library", "//vendor/github.com/prometheus/client_golang/prometheus:go_default_library",
], ],
) )

View File

@ -17,7 +17,7 @@ limitations under the License.
package metrics package metrics
import ( import (
"github.com/prometheus/client_golang/prometheus" "k8s.io/component-base/metrics"
) )
// MetricRecorder represents a metric recorder which takes action when the // MetricRecorder represents a metric recorder which takes action when the
@ -32,27 +32,27 @@ var _ MetricRecorder = &PendingPodsRecorder{}
// PendingPodsRecorder is an implementation of MetricRecorder // PendingPodsRecorder is an implementation of MetricRecorder
type PendingPodsRecorder struct { type PendingPodsRecorder struct {
recorder prometheus.Gauge recorder metrics.GaugeMetric
} }
// NewActivePodsRecorder returns ActivePods in a Prometheus metric fashion // NewActivePodsRecorder returns ActivePods in a Prometheus metric fashion
func NewActivePodsRecorder() *PendingPodsRecorder { func NewActivePodsRecorder() *PendingPodsRecorder {
return &PendingPodsRecorder{ return &PendingPodsRecorder{
recorder: ActivePods, recorder: ActivePods(),
} }
} }
// NewUnschedulablePodsRecorder returns UnschedulablePods in a Prometheus metric fashion // NewUnschedulablePodsRecorder returns UnschedulablePods in a Prometheus metric fashion
func NewUnschedulablePodsRecorder() *PendingPodsRecorder { func NewUnschedulablePodsRecorder() *PendingPodsRecorder {
return &PendingPodsRecorder{ return &PendingPodsRecorder{
recorder: UnschedulablePods, recorder: UnschedulablePods(),
} }
} }
// NewBackoffPodsRecorder returns BackoffPods in a Prometheus metric fashion // NewBackoffPodsRecorder returns BackoffPods in a Prometheus metric fashion
func NewBackoffPodsRecorder() *PendingPodsRecorder { func NewBackoffPodsRecorder() *PendingPodsRecorder {
return &PendingPodsRecorder{ return &PendingPodsRecorder{
recorder: BackoffPods, recorder: BackoffPods(),
} }
} }

View File

@ -21,6 +21,9 @@ import (
"time" "time"
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
"k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/legacyregistry"
volumescheduling "k8s.io/kubernetes/pkg/controller/volume/scheduling" volumescheduling "k8s.io/kubernetes/pkg/controller/volume/scheduling"
) )
@ -49,11 +52,12 @@ const (
// All the histogram based metrics have 1ms as size for the smallest bucket. // All the histogram based metrics have 1ms as size for the smallest bucket.
var ( var (
scheduleAttempts = prometheus.NewCounterVec( scheduleAttempts = metrics.NewCounterVec(
prometheus.CounterOpts{ &metrics.CounterOpts{
Subsystem: SchedulerSubsystem, Subsystem: SchedulerSubsystem,
Name: "schedule_attempts_total", Name: "schedule_attempts_total",
Help: "Number of attempts to schedule pods, by the result. 'unschedulable' means a pod could not be scheduled, while 'error' means an internal scheduler problem.", Help: "Number of attempts to schedule pods, by the result. 'unschedulable' means a pod could not be scheduled, while 'error' means an internal scheduler problem.",
StabilityLevel: metrics.ALPHA,
}, []string{"result"}) }, []string{"result"})
// PodScheduleSuccesses counts how many pods were scheduled. // PodScheduleSuccesses counts how many pods were scheduled.
PodScheduleSuccesses = scheduleAttempts.With(prometheus.Labels{"result": "scheduled"}) PodScheduleSuccesses = scheduleAttempts.With(prometheus.Labels{"result": "scheduled"})
@ -61,148 +65,162 @@ var (
PodScheduleFailures = scheduleAttempts.With(prometheus.Labels{"result": "unschedulable"}) PodScheduleFailures = scheduleAttempts.With(prometheus.Labels{"result": "unschedulable"})
// PodScheduleErrors counts how many pods could not be scheduled due to a scheduler error. // PodScheduleErrors counts how many pods could not be scheduled due to a scheduler error.
PodScheduleErrors = scheduleAttempts.With(prometheus.Labels{"result": "error"}) PodScheduleErrors = scheduleAttempts.With(prometheus.Labels{"result": "error"})
SchedulingLatency = prometheus.NewSummaryVec( SchedulingLatency = metrics.NewSummaryVec(
prometheus.SummaryOpts{ &metrics.SummaryOpts{
Subsystem: SchedulerSubsystem, Subsystem: SchedulerSubsystem,
Name: SchedulingLatencyName, Name: SchedulingLatencyName,
Help: "Scheduling latency in seconds split by sub-parts of the scheduling operation", Help: "Scheduling latency in seconds split by sub-parts of the scheduling operation",
// Make the sliding window of 5h. // Make the sliding window of 5h.
// TODO: The value for this should be based on some SLI definition (long term). // TODO: The value for this should be based on some SLI definition (long term).
MaxAge: 5 * time.Hour, MaxAge: 5 * time.Hour,
StabilityLevel: metrics.ALPHA,
}, },
[]string{OperationLabel}, []string{OperationLabel},
) )
DeprecatedSchedulingLatency = prometheus.NewSummaryVec( DeprecatedSchedulingLatency = metrics.NewSummaryVec(
prometheus.SummaryOpts{ &metrics.SummaryOpts{
Subsystem: SchedulerSubsystem, Subsystem: SchedulerSubsystem,
Name: DeprecatedSchedulingLatencyName, Name: DeprecatedSchedulingLatencyName,
Help: "(Deprecated) Scheduling latency in seconds split by sub-parts of the scheduling operation", Help: "(Deprecated) Scheduling latency in seconds split by sub-parts of the scheduling operation",
// Make the sliding window of 5h. // Make the sliding window of 5h.
// TODO: The value for this should be based on some SLI definition (long term). // TODO: The value for this should be based on some SLI definition (long term).
MaxAge: 5 * time.Hour, MaxAge: 5 * time.Hour,
StabilityLevel: metrics.ALPHA,
}, },
[]string{OperationLabel}, []string{OperationLabel},
) )
E2eSchedulingLatency = prometheus.NewHistogram( E2eSchedulingLatency = metrics.NewHistogram(
prometheus.HistogramOpts{ &metrics.HistogramOpts{
Subsystem: SchedulerSubsystem, Subsystem: SchedulerSubsystem,
Name: "e2e_scheduling_duration_seconds", Name: "e2e_scheduling_duration_seconds",
Help: "E2e scheduling latency in seconds (scheduling algorithm + binding)", Help: "E2e scheduling latency in seconds (scheduling algorithm + binding)",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
StabilityLevel: metrics.ALPHA,
}, },
) )
DeprecatedE2eSchedulingLatency = prometheus.NewHistogram( DeprecatedE2eSchedulingLatency = metrics.NewHistogram(
prometheus.HistogramOpts{ &metrics.HistogramOpts{
Subsystem: SchedulerSubsystem, Subsystem: SchedulerSubsystem,
Name: "e2e_scheduling_latency_microseconds", Name: "e2e_scheduling_latency_microseconds",
Help: "(Deprecated) E2e scheduling latency in microseconds (scheduling algorithm + binding)", Help: "(Deprecated) E2e scheduling latency in microseconds (scheduling algorithm + binding)",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15), Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
StabilityLevel: metrics.ALPHA,
}, },
) )
SchedulingAlgorithmLatency = prometheus.NewHistogram( SchedulingAlgorithmLatency = metrics.NewHistogram(
prometheus.HistogramOpts{ &metrics.HistogramOpts{
Subsystem: SchedulerSubsystem, Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_duration_seconds", Name: "scheduling_algorithm_duration_seconds",
Help: "Scheduling algorithm latency in seconds", Help: "Scheduling algorithm latency in seconds",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
StabilityLevel: metrics.ALPHA,
}, },
) )
DeprecatedSchedulingAlgorithmLatency = prometheus.NewHistogram( DeprecatedSchedulingAlgorithmLatency = metrics.NewHistogram(
prometheus.HistogramOpts{ &metrics.HistogramOpts{
Subsystem: SchedulerSubsystem, Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_latency_microseconds", Name: "scheduling_algorithm_latency_microseconds",
Help: "(Deprecated) Scheduling algorithm latency in microseconds", Help: "(Deprecated) Scheduling algorithm latency in microseconds",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15), Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
StabilityLevel: metrics.ALPHA,
}, },
) )
SchedulingAlgorithmPredicateEvaluationDuration = prometheus.NewHistogram( SchedulingAlgorithmPredicateEvaluationDuration = metrics.NewHistogram(
prometheus.HistogramOpts{ &metrics.HistogramOpts{
Subsystem: SchedulerSubsystem, Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_predicate_evaluation_seconds", Name: "scheduling_algorithm_predicate_evaluation_seconds",
Help: "Scheduling algorithm predicate evaluation duration in seconds", Help: "Scheduling algorithm predicate evaluation duration in seconds",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
StabilityLevel: metrics.ALPHA,
}, },
) )
DeprecatedSchedulingAlgorithmPredicateEvaluationDuration = prometheus.NewHistogram( DeprecatedSchedulingAlgorithmPredicateEvaluationDuration = metrics.NewHistogram(
prometheus.HistogramOpts{ &metrics.HistogramOpts{
Subsystem: SchedulerSubsystem, Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_predicate_evaluation", Name: "scheduling_algorithm_predicate_evaluation",
Help: "(Deprecated) Scheduling algorithm predicate evaluation duration in microseconds", Help: "(Deprecated) Scheduling algorithm predicate evaluation duration in microseconds",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15), Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
StabilityLevel: metrics.ALPHA,
}, },
) )
SchedulingAlgorithmPriorityEvaluationDuration = prometheus.NewHistogram( SchedulingAlgorithmPriorityEvaluationDuration = metrics.NewHistogram(
prometheus.HistogramOpts{ &metrics.HistogramOpts{
Subsystem: SchedulerSubsystem, Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_priority_evaluation_seconds", Name: "scheduling_algorithm_priority_evaluation_seconds",
Help: "Scheduling algorithm priority evaluation duration in seconds", Help: "Scheduling algorithm priority evaluation duration in seconds",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
StabilityLevel: metrics.ALPHA,
}, },
) )
DeprecatedSchedulingAlgorithmPriorityEvaluationDuration = prometheus.NewHistogram( DeprecatedSchedulingAlgorithmPriorityEvaluationDuration = metrics.NewHistogram(
prometheus.HistogramOpts{ &metrics.HistogramOpts{
Subsystem: SchedulerSubsystem, Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_priority_evaluation", Name: "scheduling_algorithm_priority_evaluation",
Help: "(Deprecated) Scheduling algorithm priority evaluation duration in microseconds", Help: "(Deprecated) Scheduling algorithm priority evaluation duration in microseconds",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15), Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
StabilityLevel: metrics.ALPHA,
}, },
) )
SchedulingAlgorithmPremptionEvaluationDuration = prometheus.NewHistogram( SchedulingAlgorithmPremptionEvaluationDuration = metrics.NewHistogram(
prometheus.HistogramOpts{ &metrics.HistogramOpts{
Subsystem: SchedulerSubsystem, Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_preemption_evaluation_seconds", Name: "scheduling_algorithm_preemption_evaluation_seconds",
Help: "Scheduling algorithm preemption evaluation duration in seconds", Help: "Scheduling algorithm preemption evaluation duration in seconds",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
StabilityLevel: metrics.ALPHA,
}, },
) )
DeprecatedSchedulingAlgorithmPremptionEvaluationDuration = prometheus.NewHistogram( DeprecatedSchedulingAlgorithmPremptionEvaluationDuration = metrics.NewHistogram(
prometheus.HistogramOpts{ &metrics.HistogramOpts{
Subsystem: SchedulerSubsystem, Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_preemption_evaluation", Name: "scheduling_algorithm_preemption_evaluation",
Help: "(Deprecated) Scheduling algorithm preemption evaluation duration in microseconds", Help: "(Deprecated) Scheduling algorithm preemption evaluation duration in microseconds",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15), Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
StabilityLevel: metrics.ALPHA,
}, },
) )
BindingLatency = prometheus.NewHistogram( BindingLatency = metrics.NewHistogram(
prometheus.HistogramOpts{ &metrics.HistogramOpts{
Subsystem: SchedulerSubsystem, Subsystem: SchedulerSubsystem,
Name: "binding_duration_seconds", Name: "binding_duration_seconds",
Help: "Binding latency in seconds", Help: "Binding latency in seconds",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
StabilityLevel: metrics.ALPHA,
}, },
) )
DeprecatedBindingLatency = prometheus.NewHistogram( DeprecatedBindingLatency = metrics.NewHistogram(
prometheus.HistogramOpts{ &metrics.HistogramOpts{
Subsystem: SchedulerSubsystem, Subsystem: SchedulerSubsystem,
Name: "binding_latency_microseconds", Name: "binding_latency_microseconds",
Help: "(Deprecated) Binding latency in microseconds", Help: "(Deprecated) Binding latency in microseconds",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15), Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
StabilityLevel: metrics.ALPHA,
}, },
) )
PreemptionVictims = prometheus.NewGauge( PreemptionVictims = metrics.NewGauge(
prometheus.GaugeOpts{ &metrics.GaugeOpts{
Subsystem: SchedulerSubsystem, Subsystem: SchedulerSubsystem,
Name: "pod_preemption_victims", Name: "pod_preemption_victims",
Help: "Number of selected preemption victims", Help: "Number of selected preemption victims",
StabilityLevel: metrics.ALPHA,
}) })
PreemptionAttempts = prometheus.NewCounter( PreemptionAttempts = metrics.NewCounter(
prometheus.CounterOpts{ &metrics.CounterOpts{
Subsystem: SchedulerSubsystem, Subsystem: SchedulerSubsystem,
Name: "total_preemption_attempts", Name: "total_preemption_attempts",
Help: "Total preemption attempts in the cluster till now", Help: "Total preemption attempts in the cluster till now",
StabilityLevel: metrics.ALPHA,
}) })
pendingPods = prometheus.NewGaugeVec( pendingPods = metrics.NewGaugeVec(
prometheus.GaugeOpts{ &metrics.GaugeOpts{
Subsystem: SchedulerSubsystem, Subsystem: SchedulerSubsystem,
Name: "pending_pods", Name: "pending_pods",
Help: "Number of pending pods, by the queue type. 'active' means number of pods in activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number of pods in unschedulableQ.", Help: "Number of pending pods, by the queue type. 'active' means number of pods in activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number of pods in unschedulableQ.",
StabilityLevel: metrics.ALPHA,
}, []string{"queue"}) }, []string{"queue"})
ActivePods = pendingPods.With(prometheus.Labels{"queue": "active"})
BackoffPods = pendingPods.With(prometheus.Labels{"queue": "backoff"})
UnschedulablePods = pendingPods.With(prometheus.Labels{"queue": "unschedulable"})
metricsList = []prometheus.Collector{ metricsList = []metrics.Registerable{
scheduleAttempts, scheduleAttempts,
SchedulingLatency, SchedulingLatency,
DeprecatedSchedulingLatency, DeprecatedSchedulingLatency,
@ -231,13 +249,27 @@ func Register() {
// Register the metrics. // Register the metrics.
registerMetrics.Do(func() { registerMetrics.Do(func() {
for _, metric := range metricsList { for _, metric := range metricsList {
prometheus.MustRegister(metric) legacyregistry.MustRegister(metric)
} }
volumescheduling.RegisterVolumeSchedulingMetrics() volumescheduling.RegisterVolumeSchedulingMetrics()
}) })
} }
// ActivePods returns the pending pods metrics with the label active
func ActivePods() metrics.GaugeMetric {
return pendingPods.With(prometheus.Labels{"queue": "active"})
}
// BackoffPods returns the pending pods metrics with the label backoff
func BackoffPods() metrics.GaugeMetric {
return pendingPods.With(prometheus.Labels{"queue": "backoff"})
}
// UnschedulablePods returns the pending pods metrics with the label unschedulable
func UnschedulablePods() metrics.GaugeMetric {
return pendingPods.With(prometheus.Labels{"queue": "unschedulable"})
}
// Reset resets metrics // Reset resets metrics
func Reset() { func Reset() {
SchedulingLatency.Reset() SchedulingLatency.Reset()

View File

@ -61,6 +61,8 @@ type GaugeMetric interface {
Set(float64) Set(float64)
Inc() Inc()
Dec() Dec()
Add(float64)
Write(out *dto.Metric) error
} }
// ObserverMetric captures individual observations. // ObserverMetric captures individual observations.