mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 19:56:01 +00:00
migrate scheduler metrics endpoint to metrics stability framework
This commit is contained in:
parent
f0be447922
commit
8da448dbe3
@ -20,10 +20,9 @@ go_library(
|
|||||||
importpath = "k8s.io/kubernetes/cmd/kube-scheduler",
|
importpath = "k8s.io/kubernetes/cmd/kube-scheduler",
|
||||||
deps = [
|
deps = [
|
||||||
"//cmd/kube-scheduler/app:go_default_library",
|
"//cmd/kube-scheduler/app:go_default_library",
|
||||||
"//pkg/util/prometheusclientgo:go_default_library",
|
|
||||||
"//pkg/version/prometheus:go_default_library",
|
|
||||||
"//staging/src/k8s.io/component-base/cli/flag:go_default_library",
|
"//staging/src/k8s.io/component-base/cli/flag:go_default_library",
|
||||||
"//staging/src/k8s.io/component-base/logs:go_default_library",
|
"//staging/src/k8s.io/component-base/logs:go_default_library",
|
||||||
|
"//staging/src/k8s.io/component-base/metrics/prometheus/clientgo:go_default_library",
|
||||||
"//vendor/github.com/spf13/pflag:go_default_library",
|
"//vendor/github.com/spf13/pflag:go_default_library",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
@ -36,7 +36,7 @@ go_library(
|
|||||||
"//staging/src/k8s.io/client-go/tools/leaderelection:go_default_library",
|
"//staging/src/k8s.io/client-go/tools/leaderelection:go_default_library",
|
||||||
"//staging/src/k8s.io/component-base/cli/flag:go_default_library",
|
"//staging/src/k8s.io/component-base/cli/flag:go_default_library",
|
||||||
"//staging/src/k8s.io/component-base/cli/globalflag:go_default_library",
|
"//staging/src/k8s.io/component-base/cli/globalflag:go_default_library",
|
||||||
"//vendor/github.com/prometheus/client_golang/prometheus:go_default_library",
|
"//staging/src/k8s.io/component-base/metrics/legacyregistry:go_default_library",
|
||||||
"//vendor/github.com/spf13/cobra:go_default_library",
|
"//vendor/github.com/spf13/cobra:go_default_library",
|
||||||
"//vendor/k8s.io/klog:go_default_library",
|
"//vendor/k8s.io/klog:go_default_library",
|
||||||
],
|
],
|
||||||
|
@ -25,6 +25,8 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
goruntime "runtime"
|
goruntime "runtime"
|
||||||
|
|
||||||
|
"github.com/spf13/cobra"
|
||||||
|
|
||||||
utilerrors "k8s.io/apimachinery/pkg/util/errors"
|
utilerrors "k8s.io/apimachinery/pkg/util/errors"
|
||||||
"k8s.io/apiserver/pkg/authentication/authenticator"
|
"k8s.io/apiserver/pkg/authentication/authenticator"
|
||||||
"k8s.io/apiserver/pkg/authorization/authorizer"
|
"k8s.io/apiserver/pkg/authorization/authorizer"
|
||||||
@ -39,6 +41,8 @@ import (
|
|||||||
"k8s.io/client-go/tools/leaderelection"
|
"k8s.io/client-go/tools/leaderelection"
|
||||||
cliflag "k8s.io/component-base/cli/flag"
|
cliflag "k8s.io/component-base/cli/flag"
|
||||||
"k8s.io/component-base/cli/globalflag"
|
"k8s.io/component-base/cli/globalflag"
|
||||||
|
"k8s.io/component-base/metrics/legacyregistry"
|
||||||
|
"k8s.io/klog"
|
||||||
schedulerserverconfig "k8s.io/kubernetes/cmd/kube-scheduler/app/config"
|
schedulerserverconfig "k8s.io/kubernetes/cmd/kube-scheduler/app/config"
|
||||||
"k8s.io/kubernetes/cmd/kube-scheduler/app/options"
|
"k8s.io/kubernetes/cmd/kube-scheduler/app/options"
|
||||||
"k8s.io/kubernetes/pkg/api/legacyscheme"
|
"k8s.io/kubernetes/pkg/api/legacyscheme"
|
||||||
@ -51,10 +55,6 @@ import (
|
|||||||
utilflag "k8s.io/kubernetes/pkg/util/flag"
|
utilflag "k8s.io/kubernetes/pkg/util/flag"
|
||||||
"k8s.io/kubernetes/pkg/version"
|
"k8s.io/kubernetes/pkg/version"
|
||||||
"k8s.io/kubernetes/pkg/version/verflag"
|
"k8s.io/kubernetes/pkg/version/verflag"
|
||||||
|
|
||||||
"github.com/prometheus/client_golang/prometheus"
|
|
||||||
"github.com/spf13/cobra"
|
|
||||||
"k8s.io/klog"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// Option configures a framework.Registry.
|
// Option configures a framework.Registry.
|
||||||
@ -294,7 +294,7 @@ func buildHandlerChain(handler http.Handler, authn authenticator.Request, authz
|
|||||||
|
|
||||||
func installMetricHandler(pathRecorderMux *mux.PathRecorderMux) {
|
func installMetricHandler(pathRecorderMux *mux.PathRecorderMux) {
|
||||||
configz.InstallHandler(pathRecorderMux)
|
configz.InstallHandler(pathRecorderMux)
|
||||||
defaultMetricsHandler := prometheus.Handler().ServeHTTP
|
defaultMetricsHandler := legacyregistry.Handler().ServeHTTP
|
||||||
pathRecorderMux.HandleFunc("/metrics", func(w http.ResponseWriter, req *http.Request) {
|
pathRecorderMux.HandleFunc("/metrics", func(w http.ResponseWriter, req *http.Request) {
|
||||||
if req.Method == "DELETE" {
|
if req.Method == "DELETE" {
|
||||||
metrics.Reset()
|
metrics.Reset()
|
||||||
|
@ -25,9 +25,8 @@ import (
|
|||||||
|
|
||||||
cliflag "k8s.io/component-base/cli/flag"
|
cliflag "k8s.io/component-base/cli/flag"
|
||||||
"k8s.io/component-base/logs"
|
"k8s.io/component-base/logs"
|
||||||
|
_ "k8s.io/component-base/metrics/prometheus/clientgo"
|
||||||
"k8s.io/kubernetes/cmd/kube-scheduler/app"
|
"k8s.io/kubernetes/cmd/kube-scheduler/app"
|
||||||
_ "k8s.io/kubernetes/pkg/util/prometheusclientgo" // load all the prometheus client-go plugins
|
|
||||||
_ "k8s.io/kubernetes/pkg/version/prometheus" // for version metric registration
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
|
@ -26,6 +26,8 @@ go_library(
|
|||||||
"//staging/src/k8s.io/client-go/kubernetes:go_default_library",
|
"//staging/src/k8s.io/client-go/kubernetes:go_default_library",
|
||||||
"//staging/src/k8s.io/client-go/listers/storage/v1:go_default_library",
|
"//staging/src/k8s.io/client-go/listers/storage/v1:go_default_library",
|
||||||
"//staging/src/k8s.io/client-go/tools/cache:go_default_library",
|
"//staging/src/k8s.io/client-go/tools/cache:go_default_library",
|
||||||
|
"//staging/src/k8s.io/component-base/metrics:go_default_library",
|
||||||
|
"//staging/src/k8s.io/component-base/metrics/legacyregistry:go_default_library",
|
||||||
"//vendor/github.com/prometheus/client_golang/prometheus:go_default_library",
|
"//vendor/github.com/prometheus/client_golang/prometheus:go_default_library",
|
||||||
"//vendor/k8s.io/klog:go_default_library",
|
"//vendor/k8s.io/klog:go_default_library",
|
||||||
],
|
],
|
||||||
|
@ -18,6 +18,9 @@ package scheduling
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/prometheus/client_golang/prometheus"
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
|
|
||||||
|
"k8s.io/component-base/metrics"
|
||||||
|
"k8s.io/component-base/metrics/legacyregistry"
|
||||||
)
|
)
|
||||||
|
|
||||||
// VolumeSchedulerSubsystem - subsystem name used by scheduler
|
// VolumeSchedulerSubsystem - subsystem name used by scheduler
|
||||||
@ -25,30 +28,33 @@ const VolumeSchedulerSubsystem = "scheduler_volume"
|
|||||||
|
|
||||||
var (
|
var (
|
||||||
// VolumeBindingRequestSchedulerBinderCache tracks the number of volume binder cache operations.
|
// VolumeBindingRequestSchedulerBinderCache tracks the number of volume binder cache operations.
|
||||||
VolumeBindingRequestSchedulerBinderCache = prometheus.NewCounterVec(
|
VolumeBindingRequestSchedulerBinderCache = metrics.NewCounterVec(
|
||||||
prometheus.CounterOpts{
|
&metrics.CounterOpts{
|
||||||
Subsystem: VolumeSchedulerSubsystem,
|
Subsystem: VolumeSchedulerSubsystem,
|
||||||
Name: "binder_cache_requests_total",
|
Name: "binder_cache_requests_total",
|
||||||
Help: "Total number for request volume binding cache",
|
Help: "Total number for request volume binding cache",
|
||||||
|
StabilityLevel: metrics.ALPHA,
|
||||||
},
|
},
|
||||||
[]string{"operation"},
|
[]string{"operation"},
|
||||||
)
|
)
|
||||||
// VolumeSchedulingStageLatency tracks the latency of volume scheduling operations.
|
// VolumeSchedulingStageLatency tracks the latency of volume scheduling operations.
|
||||||
VolumeSchedulingStageLatency = prometheus.NewHistogramVec(
|
VolumeSchedulingStageLatency = metrics.NewHistogramVec(
|
||||||
prometheus.HistogramOpts{
|
&metrics.HistogramOpts{
|
||||||
Subsystem: VolumeSchedulerSubsystem,
|
Subsystem: VolumeSchedulerSubsystem,
|
||||||
Name: "scheduling_duration_seconds",
|
Name: "scheduling_duration_seconds",
|
||||||
Help: "Volume scheduling stage latency",
|
Help: "Volume scheduling stage latency",
|
||||||
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
|
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
|
||||||
|
StabilityLevel: metrics.ALPHA,
|
||||||
},
|
},
|
||||||
[]string{"operation"},
|
[]string{"operation"},
|
||||||
)
|
)
|
||||||
// VolumeSchedulingStageFailed tracks the number of failed volume scheduling operations.
|
// VolumeSchedulingStageFailed tracks the number of failed volume scheduling operations.
|
||||||
VolumeSchedulingStageFailed = prometheus.NewCounterVec(
|
VolumeSchedulingStageFailed = metrics.NewCounterVec(
|
||||||
prometheus.CounterOpts{
|
&metrics.CounterOpts{
|
||||||
Subsystem: VolumeSchedulerSubsystem,
|
Subsystem: VolumeSchedulerSubsystem,
|
||||||
Name: "scheduling_stage_error_total",
|
Name: "scheduling_stage_error_total",
|
||||||
Help: "Volume scheduling stage error count",
|
Help: "Volume scheduling stage error count",
|
||||||
|
StabilityLevel: metrics.ALPHA,
|
||||||
},
|
},
|
||||||
[]string{"operation"},
|
[]string{"operation"},
|
||||||
)
|
)
|
||||||
@ -57,7 +63,7 @@ var (
|
|||||||
// RegisterVolumeSchedulingMetrics is used for scheduler, because the volume binding cache is a library
|
// RegisterVolumeSchedulingMetrics is used for scheduler, because the volume binding cache is a library
|
||||||
// used by scheduler process.
|
// used by scheduler process.
|
||||||
func RegisterVolumeSchedulingMetrics() {
|
func RegisterVolumeSchedulingMetrics() {
|
||||||
prometheus.MustRegister(VolumeBindingRequestSchedulerBinderCache)
|
legacyregistry.MustRegister(VolumeBindingRequestSchedulerBinderCache)
|
||||||
prometheus.MustRegister(VolumeSchedulingStageLatency)
|
legacyregistry.MustRegister(VolumeSchedulingStageLatency)
|
||||||
prometheus.MustRegister(VolumeSchedulingStageFailed)
|
legacyregistry.MustRegister(VolumeSchedulingStageFailed)
|
||||||
}
|
}
|
||||||
|
@ -1217,6 +1217,7 @@ func TestPodTimestamp(t *testing.T) {
|
|||||||
func TestPendingPodsMetric(t *testing.T) {
|
func TestPendingPodsMetric(t *testing.T) {
|
||||||
total := 50
|
total := 50
|
||||||
timestamp := time.Now()
|
timestamp := time.Now()
|
||||||
|
metrics.Register()
|
||||||
var pInfos = make([]*framework.PodInfo, 0, total)
|
var pInfos = make([]*framework.PodInfo, 0, total)
|
||||||
for i := 1; i <= total; i++ {
|
for i := 1; i <= total; i++ {
|
||||||
p := &framework.PodInfo{
|
p := &framework.PodInfo{
|
||||||
@ -1312,9 +1313,9 @@ func TestPendingPodsMetric(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
resetMetrics := func() {
|
resetMetrics := func() {
|
||||||
metrics.ActivePods.Set(0)
|
metrics.ActivePods().Set(0)
|
||||||
metrics.BackoffPods.Set(0)
|
metrics.BackoffPods().Set(0)
|
||||||
metrics.UnschedulablePods.Set(0)
|
metrics.UnschedulablePods().Set(0)
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
@ -1329,7 +1330,7 @@ func TestPendingPodsMetric(t *testing.T) {
|
|||||||
|
|
||||||
var activeNum, backoffNum, unschedulableNum float64
|
var activeNum, backoffNum, unschedulableNum float64
|
||||||
metricProto := &dto.Metric{}
|
metricProto := &dto.Metric{}
|
||||||
if err := metrics.ActivePods.Write(metricProto); err != nil {
|
if err := metrics.ActivePods().Write(metricProto); err != nil {
|
||||||
t.Errorf("error writing ActivePods metric: %v", err)
|
t.Errorf("error writing ActivePods metric: %v", err)
|
||||||
}
|
}
|
||||||
activeNum = metricProto.Gauge.GetValue()
|
activeNum = metricProto.Gauge.GetValue()
|
||||||
@ -1337,7 +1338,7 @@ func TestPendingPodsMetric(t *testing.T) {
|
|||||||
t.Errorf("ActivePods: Expected %v, got %v", test.expected[0], activeNum)
|
t.Errorf("ActivePods: Expected %v, got %v", test.expected[0], activeNum)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := metrics.BackoffPods.Write(metricProto); err != nil {
|
if err := metrics.BackoffPods().Write(metricProto); err != nil {
|
||||||
t.Errorf("error writing BackoffPods metric: %v", err)
|
t.Errorf("error writing BackoffPods metric: %v", err)
|
||||||
}
|
}
|
||||||
backoffNum = metricProto.Gauge.GetValue()
|
backoffNum = metricProto.Gauge.GetValue()
|
||||||
@ -1345,7 +1346,7 @@ func TestPendingPodsMetric(t *testing.T) {
|
|||||||
t.Errorf("BackoffPods: Expected %v, got %v", test.expected[1], backoffNum)
|
t.Errorf("BackoffPods: Expected %v, got %v", test.expected[1], backoffNum)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := metrics.UnschedulablePods.Write(metricProto); err != nil {
|
if err := metrics.UnschedulablePods().Write(metricProto); err != nil {
|
||||||
t.Errorf("error writing UnschedulablePods metric: %v", err)
|
t.Errorf("error writing UnschedulablePods metric: %v", err)
|
||||||
}
|
}
|
||||||
unschedulableNum = metricProto.Gauge.GetValue()
|
unschedulableNum = metricProto.Gauge.GetValue()
|
||||||
|
@ -11,6 +11,8 @@ go_library(
|
|||||||
importpath = "k8s.io/kubernetes/pkg/scheduler/metrics",
|
importpath = "k8s.io/kubernetes/pkg/scheduler/metrics",
|
||||||
deps = [
|
deps = [
|
||||||
"//pkg/controller/volume/scheduling:go_default_library",
|
"//pkg/controller/volume/scheduling:go_default_library",
|
||||||
|
"//staging/src/k8s.io/component-base/metrics:go_default_library",
|
||||||
|
"//staging/src/k8s.io/component-base/metrics/legacyregistry:go_default_library",
|
||||||
"//vendor/github.com/prometheus/client_golang/prometheus:go_default_library",
|
"//vendor/github.com/prometheus/client_golang/prometheus:go_default_library",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
@ -17,7 +17,7 @@ limitations under the License.
|
|||||||
package metrics
|
package metrics
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/prometheus/client_golang/prometheus"
|
"k8s.io/component-base/metrics"
|
||||||
)
|
)
|
||||||
|
|
||||||
// MetricRecorder represents a metric recorder which takes action when the
|
// MetricRecorder represents a metric recorder which takes action when the
|
||||||
@ -32,27 +32,27 @@ var _ MetricRecorder = &PendingPodsRecorder{}
|
|||||||
|
|
||||||
// PendingPodsRecorder is an implementation of MetricRecorder
|
// PendingPodsRecorder is an implementation of MetricRecorder
|
||||||
type PendingPodsRecorder struct {
|
type PendingPodsRecorder struct {
|
||||||
recorder prometheus.Gauge
|
recorder metrics.GaugeMetric
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewActivePodsRecorder returns ActivePods in a Prometheus metric fashion
|
// NewActivePodsRecorder returns ActivePods in a Prometheus metric fashion
|
||||||
func NewActivePodsRecorder() *PendingPodsRecorder {
|
func NewActivePodsRecorder() *PendingPodsRecorder {
|
||||||
return &PendingPodsRecorder{
|
return &PendingPodsRecorder{
|
||||||
recorder: ActivePods,
|
recorder: ActivePods(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewUnschedulablePodsRecorder returns UnschedulablePods in a Prometheus metric fashion
|
// NewUnschedulablePodsRecorder returns UnschedulablePods in a Prometheus metric fashion
|
||||||
func NewUnschedulablePodsRecorder() *PendingPodsRecorder {
|
func NewUnschedulablePodsRecorder() *PendingPodsRecorder {
|
||||||
return &PendingPodsRecorder{
|
return &PendingPodsRecorder{
|
||||||
recorder: UnschedulablePods,
|
recorder: UnschedulablePods(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewBackoffPodsRecorder returns BackoffPods in a Prometheus metric fashion
|
// NewBackoffPodsRecorder returns BackoffPods in a Prometheus metric fashion
|
||||||
func NewBackoffPodsRecorder() *PendingPodsRecorder {
|
func NewBackoffPodsRecorder() *PendingPodsRecorder {
|
||||||
return &PendingPodsRecorder{
|
return &PendingPodsRecorder{
|
||||||
recorder: BackoffPods,
|
recorder: BackoffPods(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -21,6 +21,9 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/prometheus/client_golang/prometheus"
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
|
|
||||||
|
"k8s.io/component-base/metrics"
|
||||||
|
"k8s.io/component-base/metrics/legacyregistry"
|
||||||
volumescheduling "k8s.io/kubernetes/pkg/controller/volume/scheduling"
|
volumescheduling "k8s.io/kubernetes/pkg/controller/volume/scheduling"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -49,11 +52,12 @@ const (
|
|||||||
|
|
||||||
// All the histogram based metrics have 1ms as size for the smallest bucket.
|
// All the histogram based metrics have 1ms as size for the smallest bucket.
|
||||||
var (
|
var (
|
||||||
scheduleAttempts = prometheus.NewCounterVec(
|
scheduleAttempts = metrics.NewCounterVec(
|
||||||
prometheus.CounterOpts{
|
&metrics.CounterOpts{
|
||||||
Subsystem: SchedulerSubsystem,
|
Subsystem: SchedulerSubsystem,
|
||||||
Name: "schedule_attempts_total",
|
Name: "schedule_attempts_total",
|
||||||
Help: "Number of attempts to schedule pods, by the result. 'unschedulable' means a pod could not be scheduled, while 'error' means an internal scheduler problem.",
|
Help: "Number of attempts to schedule pods, by the result. 'unschedulable' means a pod could not be scheduled, while 'error' means an internal scheduler problem.",
|
||||||
|
StabilityLevel: metrics.ALPHA,
|
||||||
}, []string{"result"})
|
}, []string{"result"})
|
||||||
// PodScheduleSuccesses counts how many pods were scheduled.
|
// PodScheduleSuccesses counts how many pods were scheduled.
|
||||||
PodScheduleSuccesses = scheduleAttempts.With(prometheus.Labels{"result": "scheduled"})
|
PodScheduleSuccesses = scheduleAttempts.With(prometheus.Labels{"result": "scheduled"})
|
||||||
@ -61,148 +65,162 @@ var (
|
|||||||
PodScheduleFailures = scheduleAttempts.With(prometheus.Labels{"result": "unschedulable"})
|
PodScheduleFailures = scheduleAttempts.With(prometheus.Labels{"result": "unschedulable"})
|
||||||
// PodScheduleErrors counts how many pods could not be scheduled due to a scheduler error.
|
// PodScheduleErrors counts how many pods could not be scheduled due to a scheduler error.
|
||||||
PodScheduleErrors = scheduleAttempts.With(prometheus.Labels{"result": "error"})
|
PodScheduleErrors = scheduleAttempts.With(prometheus.Labels{"result": "error"})
|
||||||
SchedulingLatency = prometheus.NewSummaryVec(
|
SchedulingLatency = metrics.NewSummaryVec(
|
||||||
prometheus.SummaryOpts{
|
&metrics.SummaryOpts{
|
||||||
Subsystem: SchedulerSubsystem,
|
Subsystem: SchedulerSubsystem,
|
||||||
Name: SchedulingLatencyName,
|
Name: SchedulingLatencyName,
|
||||||
Help: "Scheduling latency in seconds split by sub-parts of the scheduling operation",
|
Help: "Scheduling latency in seconds split by sub-parts of the scheduling operation",
|
||||||
// Make the sliding window of 5h.
|
// Make the sliding window of 5h.
|
||||||
// TODO: The value for this should be based on some SLI definition (long term).
|
// TODO: The value for this should be based on some SLI definition (long term).
|
||||||
MaxAge: 5 * time.Hour,
|
MaxAge: 5 * time.Hour,
|
||||||
|
StabilityLevel: metrics.ALPHA,
|
||||||
},
|
},
|
||||||
[]string{OperationLabel},
|
[]string{OperationLabel},
|
||||||
)
|
)
|
||||||
DeprecatedSchedulingLatency = prometheus.NewSummaryVec(
|
DeprecatedSchedulingLatency = metrics.NewSummaryVec(
|
||||||
prometheus.SummaryOpts{
|
&metrics.SummaryOpts{
|
||||||
Subsystem: SchedulerSubsystem,
|
Subsystem: SchedulerSubsystem,
|
||||||
Name: DeprecatedSchedulingLatencyName,
|
Name: DeprecatedSchedulingLatencyName,
|
||||||
Help: "(Deprecated) Scheduling latency in seconds split by sub-parts of the scheduling operation",
|
Help: "(Deprecated) Scheduling latency in seconds split by sub-parts of the scheduling operation",
|
||||||
// Make the sliding window of 5h.
|
// Make the sliding window of 5h.
|
||||||
// TODO: The value for this should be based on some SLI definition (long term).
|
// TODO: The value for this should be based on some SLI definition (long term).
|
||||||
MaxAge: 5 * time.Hour,
|
MaxAge: 5 * time.Hour,
|
||||||
|
StabilityLevel: metrics.ALPHA,
|
||||||
},
|
},
|
||||||
[]string{OperationLabel},
|
[]string{OperationLabel},
|
||||||
)
|
)
|
||||||
E2eSchedulingLatency = prometheus.NewHistogram(
|
E2eSchedulingLatency = metrics.NewHistogram(
|
||||||
prometheus.HistogramOpts{
|
&metrics.HistogramOpts{
|
||||||
Subsystem: SchedulerSubsystem,
|
Subsystem: SchedulerSubsystem,
|
||||||
Name: "e2e_scheduling_duration_seconds",
|
Name: "e2e_scheduling_duration_seconds",
|
||||||
Help: "E2e scheduling latency in seconds (scheduling algorithm + binding)",
|
Help: "E2e scheduling latency in seconds (scheduling algorithm + binding)",
|
||||||
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
|
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
|
||||||
|
StabilityLevel: metrics.ALPHA,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
DeprecatedE2eSchedulingLatency = prometheus.NewHistogram(
|
DeprecatedE2eSchedulingLatency = metrics.NewHistogram(
|
||||||
prometheus.HistogramOpts{
|
&metrics.HistogramOpts{
|
||||||
Subsystem: SchedulerSubsystem,
|
Subsystem: SchedulerSubsystem,
|
||||||
Name: "e2e_scheduling_latency_microseconds",
|
Name: "e2e_scheduling_latency_microseconds",
|
||||||
Help: "(Deprecated) E2e scheduling latency in microseconds (scheduling algorithm + binding)",
|
Help: "(Deprecated) E2e scheduling latency in microseconds (scheduling algorithm + binding)",
|
||||||
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
|
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
|
||||||
|
StabilityLevel: metrics.ALPHA,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
SchedulingAlgorithmLatency = prometheus.NewHistogram(
|
SchedulingAlgorithmLatency = metrics.NewHistogram(
|
||||||
prometheus.HistogramOpts{
|
&metrics.HistogramOpts{
|
||||||
Subsystem: SchedulerSubsystem,
|
Subsystem: SchedulerSubsystem,
|
||||||
Name: "scheduling_algorithm_duration_seconds",
|
Name: "scheduling_algorithm_duration_seconds",
|
||||||
Help: "Scheduling algorithm latency in seconds",
|
Help: "Scheduling algorithm latency in seconds",
|
||||||
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
|
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
|
||||||
|
StabilityLevel: metrics.ALPHA,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
DeprecatedSchedulingAlgorithmLatency = prometheus.NewHistogram(
|
DeprecatedSchedulingAlgorithmLatency = metrics.NewHistogram(
|
||||||
prometheus.HistogramOpts{
|
&metrics.HistogramOpts{
|
||||||
Subsystem: SchedulerSubsystem,
|
Subsystem: SchedulerSubsystem,
|
||||||
Name: "scheduling_algorithm_latency_microseconds",
|
Name: "scheduling_algorithm_latency_microseconds",
|
||||||
Help: "(Deprecated) Scheduling algorithm latency in microseconds",
|
Help: "(Deprecated) Scheduling algorithm latency in microseconds",
|
||||||
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
|
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
|
||||||
|
StabilityLevel: metrics.ALPHA,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
SchedulingAlgorithmPredicateEvaluationDuration = prometheus.NewHistogram(
|
SchedulingAlgorithmPredicateEvaluationDuration = metrics.NewHistogram(
|
||||||
prometheus.HistogramOpts{
|
&metrics.HistogramOpts{
|
||||||
Subsystem: SchedulerSubsystem,
|
Subsystem: SchedulerSubsystem,
|
||||||
Name: "scheduling_algorithm_predicate_evaluation_seconds",
|
Name: "scheduling_algorithm_predicate_evaluation_seconds",
|
||||||
Help: "Scheduling algorithm predicate evaluation duration in seconds",
|
Help: "Scheduling algorithm predicate evaluation duration in seconds",
|
||||||
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
|
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
|
||||||
|
StabilityLevel: metrics.ALPHA,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
DeprecatedSchedulingAlgorithmPredicateEvaluationDuration = prometheus.NewHistogram(
|
DeprecatedSchedulingAlgorithmPredicateEvaluationDuration = metrics.NewHistogram(
|
||||||
prometheus.HistogramOpts{
|
&metrics.HistogramOpts{
|
||||||
Subsystem: SchedulerSubsystem,
|
Subsystem: SchedulerSubsystem,
|
||||||
Name: "scheduling_algorithm_predicate_evaluation",
|
Name: "scheduling_algorithm_predicate_evaluation",
|
||||||
Help: "(Deprecated) Scheduling algorithm predicate evaluation duration in microseconds",
|
Help: "(Deprecated) Scheduling algorithm predicate evaluation duration in microseconds",
|
||||||
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
|
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
|
||||||
|
StabilityLevel: metrics.ALPHA,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
SchedulingAlgorithmPriorityEvaluationDuration = prometheus.NewHistogram(
|
SchedulingAlgorithmPriorityEvaluationDuration = metrics.NewHistogram(
|
||||||
prometheus.HistogramOpts{
|
&metrics.HistogramOpts{
|
||||||
Subsystem: SchedulerSubsystem,
|
Subsystem: SchedulerSubsystem,
|
||||||
Name: "scheduling_algorithm_priority_evaluation_seconds",
|
Name: "scheduling_algorithm_priority_evaluation_seconds",
|
||||||
Help: "Scheduling algorithm priority evaluation duration in seconds",
|
Help: "Scheduling algorithm priority evaluation duration in seconds",
|
||||||
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
|
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
|
||||||
|
StabilityLevel: metrics.ALPHA,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
DeprecatedSchedulingAlgorithmPriorityEvaluationDuration = prometheus.NewHistogram(
|
DeprecatedSchedulingAlgorithmPriorityEvaluationDuration = metrics.NewHistogram(
|
||||||
prometheus.HistogramOpts{
|
&metrics.HistogramOpts{
|
||||||
Subsystem: SchedulerSubsystem,
|
Subsystem: SchedulerSubsystem,
|
||||||
Name: "scheduling_algorithm_priority_evaluation",
|
Name: "scheduling_algorithm_priority_evaluation",
|
||||||
Help: "(Deprecated) Scheduling algorithm priority evaluation duration in microseconds",
|
Help: "(Deprecated) Scheduling algorithm priority evaluation duration in microseconds",
|
||||||
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
|
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
|
||||||
|
StabilityLevel: metrics.ALPHA,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
SchedulingAlgorithmPremptionEvaluationDuration = prometheus.NewHistogram(
|
SchedulingAlgorithmPremptionEvaluationDuration = metrics.NewHistogram(
|
||||||
prometheus.HistogramOpts{
|
&metrics.HistogramOpts{
|
||||||
Subsystem: SchedulerSubsystem,
|
Subsystem: SchedulerSubsystem,
|
||||||
Name: "scheduling_algorithm_preemption_evaluation_seconds",
|
Name: "scheduling_algorithm_preemption_evaluation_seconds",
|
||||||
Help: "Scheduling algorithm preemption evaluation duration in seconds",
|
Help: "Scheduling algorithm preemption evaluation duration in seconds",
|
||||||
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
|
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
|
||||||
|
StabilityLevel: metrics.ALPHA,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
DeprecatedSchedulingAlgorithmPremptionEvaluationDuration = prometheus.NewHistogram(
|
DeprecatedSchedulingAlgorithmPremptionEvaluationDuration = metrics.NewHistogram(
|
||||||
prometheus.HistogramOpts{
|
&metrics.HistogramOpts{
|
||||||
Subsystem: SchedulerSubsystem,
|
Subsystem: SchedulerSubsystem,
|
||||||
Name: "scheduling_algorithm_preemption_evaluation",
|
Name: "scheduling_algorithm_preemption_evaluation",
|
||||||
Help: "(Deprecated) Scheduling algorithm preemption evaluation duration in microseconds",
|
Help: "(Deprecated) Scheduling algorithm preemption evaluation duration in microseconds",
|
||||||
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
|
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
|
||||||
|
StabilityLevel: metrics.ALPHA,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
BindingLatency = prometheus.NewHistogram(
|
BindingLatency = metrics.NewHistogram(
|
||||||
prometheus.HistogramOpts{
|
&metrics.HistogramOpts{
|
||||||
Subsystem: SchedulerSubsystem,
|
Subsystem: SchedulerSubsystem,
|
||||||
Name: "binding_duration_seconds",
|
Name: "binding_duration_seconds",
|
||||||
Help: "Binding latency in seconds",
|
Help: "Binding latency in seconds",
|
||||||
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
|
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
|
||||||
|
StabilityLevel: metrics.ALPHA,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
DeprecatedBindingLatency = prometheus.NewHistogram(
|
DeprecatedBindingLatency = metrics.NewHistogram(
|
||||||
prometheus.HistogramOpts{
|
&metrics.HistogramOpts{
|
||||||
Subsystem: SchedulerSubsystem,
|
Subsystem: SchedulerSubsystem,
|
||||||
Name: "binding_latency_microseconds",
|
Name: "binding_latency_microseconds",
|
||||||
Help: "(Deprecated) Binding latency in microseconds",
|
Help: "(Deprecated) Binding latency in microseconds",
|
||||||
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
|
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
|
||||||
|
StabilityLevel: metrics.ALPHA,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
PreemptionVictims = prometheus.NewGauge(
|
PreemptionVictims = metrics.NewGauge(
|
||||||
prometheus.GaugeOpts{
|
&metrics.GaugeOpts{
|
||||||
Subsystem: SchedulerSubsystem,
|
Subsystem: SchedulerSubsystem,
|
||||||
Name: "pod_preemption_victims",
|
Name: "pod_preemption_victims",
|
||||||
Help: "Number of selected preemption victims",
|
Help: "Number of selected preemption victims",
|
||||||
|
StabilityLevel: metrics.ALPHA,
|
||||||
})
|
})
|
||||||
PreemptionAttempts = prometheus.NewCounter(
|
PreemptionAttempts = metrics.NewCounter(
|
||||||
prometheus.CounterOpts{
|
&metrics.CounterOpts{
|
||||||
Subsystem: SchedulerSubsystem,
|
Subsystem: SchedulerSubsystem,
|
||||||
Name: "total_preemption_attempts",
|
Name: "total_preemption_attempts",
|
||||||
Help: "Total preemption attempts in the cluster till now",
|
Help: "Total preemption attempts in the cluster till now",
|
||||||
|
StabilityLevel: metrics.ALPHA,
|
||||||
})
|
})
|
||||||
|
|
||||||
pendingPods = prometheus.NewGaugeVec(
|
pendingPods = metrics.NewGaugeVec(
|
||||||
prometheus.GaugeOpts{
|
&metrics.GaugeOpts{
|
||||||
Subsystem: SchedulerSubsystem,
|
Subsystem: SchedulerSubsystem,
|
||||||
Name: "pending_pods",
|
Name: "pending_pods",
|
||||||
Help: "Number of pending pods, by the queue type. 'active' means number of pods in activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number of pods in unschedulableQ.",
|
Help: "Number of pending pods, by the queue type. 'active' means number of pods in activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number of pods in unschedulableQ.",
|
||||||
|
StabilityLevel: metrics.ALPHA,
|
||||||
}, []string{"queue"})
|
}, []string{"queue"})
|
||||||
ActivePods = pendingPods.With(prometheus.Labels{"queue": "active"})
|
|
||||||
BackoffPods = pendingPods.With(prometheus.Labels{"queue": "backoff"})
|
|
||||||
UnschedulablePods = pendingPods.With(prometheus.Labels{"queue": "unschedulable"})
|
|
||||||
|
|
||||||
metricsList = []prometheus.Collector{
|
metricsList = []metrics.Registerable{
|
||||||
scheduleAttempts,
|
scheduleAttempts,
|
||||||
SchedulingLatency,
|
SchedulingLatency,
|
||||||
DeprecatedSchedulingLatency,
|
DeprecatedSchedulingLatency,
|
||||||
@ -231,13 +249,27 @@ func Register() {
|
|||||||
// Register the metrics.
|
// Register the metrics.
|
||||||
registerMetrics.Do(func() {
|
registerMetrics.Do(func() {
|
||||||
for _, metric := range metricsList {
|
for _, metric := range metricsList {
|
||||||
prometheus.MustRegister(metric)
|
legacyregistry.MustRegister(metric)
|
||||||
}
|
}
|
||||||
|
|
||||||
volumescheduling.RegisterVolumeSchedulingMetrics()
|
volumescheduling.RegisterVolumeSchedulingMetrics()
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ActivePods returns the pending pods metrics with the label active
|
||||||
|
func ActivePods() metrics.GaugeMetric {
|
||||||
|
return pendingPods.With(prometheus.Labels{"queue": "active"})
|
||||||
|
}
|
||||||
|
|
||||||
|
// BackoffPods returns the pending pods metrics with the label backoff
|
||||||
|
func BackoffPods() metrics.GaugeMetric {
|
||||||
|
return pendingPods.With(prometheus.Labels{"queue": "backoff"})
|
||||||
|
}
|
||||||
|
|
||||||
|
// UnschedulablePods returns the pending pods metrics with the label unschedulable
|
||||||
|
func UnschedulablePods() metrics.GaugeMetric {
|
||||||
|
return pendingPods.With(prometheus.Labels{"queue": "unschedulable"})
|
||||||
|
}
|
||||||
|
|
||||||
// Reset resets metrics
|
// Reset resets metrics
|
||||||
func Reset() {
|
func Reset() {
|
||||||
SchedulingLatency.Reset()
|
SchedulingLatency.Reset()
|
||||||
|
@ -61,6 +61,8 @@ type GaugeMetric interface {
|
|||||||
Set(float64)
|
Set(float64)
|
||||||
Inc()
|
Inc()
|
||||||
Dec()
|
Dec()
|
||||||
|
Add(float64)
|
||||||
|
Write(out *dto.Metric) error
|
||||||
}
|
}
|
||||||
|
|
||||||
// ObserverMetric captures individual observations.
|
// ObserverMetric captures individual observations.
|
||||||
|
Loading…
Reference in New Issue
Block a user