diff --git a/pkg/scheduler/backend/cache/cache_test.go b/pkg/scheduler/backend/cache/cache_test.go index 61fe56000eb..b28579f65dc 100644 --- a/pkg/scheduler/backend/cache/cache_test.go +++ b/pkg/scheduler/backend/cache/cache_test.go @@ -33,6 +33,7 @@ import ( "k8s.io/klog/v2" "k8s.io/klog/v2/ktesting" "k8s.io/kubernetes/pkg/scheduler/framework" + "k8s.io/kubernetes/pkg/scheduler/metrics" st "k8s.io/kubernetes/pkg/scheduler/testing" schedutil "k8s.io/kubernetes/pkg/scheduler/util" ) @@ -264,6 +265,7 @@ func assumeAndFinishBinding(logger klog.Logger, cache *cacheImpl, pod *v1.Pod, a // TestExpirePod tests that assumed pods will be removed if expired. // The removal will be reflected in node info. func TestExpirePod(t *testing.T) { + metrics.Register() nodeName := "node" testPods := []*v1.Pod{ makeBasePod(t, nodeName, "test-1", "100m", "500", "", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 80, Protocol: "TCP"}}), diff --git a/pkg/scheduler/backend/queue/active_queue_test.go b/pkg/scheduler/backend/queue/active_queue_test.go index c22abe67294..cdb0aa0af3e 100644 --- a/pkg/scheduler/backend/queue/active_queue_test.go +++ b/pkg/scheduler/backend/queue/active_queue_test.go @@ -29,6 +29,7 @@ import ( func TestClose(t *testing.T) { logger, ctx := ktesting.NewTestContext(t) + metrics.Register() rr := metrics.NewMetricsAsyncRecorder(10, time.Second, ctx.Done()) aq := newActiveQueue(heap.NewWithRecorder(podInfoKeyFunc, heap.LessFunc[*framework.QueuedPodInfo](newDefaultQueueSort()), metrics.NewActivePodsRecorder()), true, *rr) diff --git a/pkg/scheduler/backend/queue/scheduling_queue_test.go b/pkg/scheduler/backend/queue/scheduling_queue_test.go index 5ea16250197..5450526ebf1 100644 --- a/pkg/scheduler/backend/queue/scheduling_queue_test.go +++ b/pkg/scheduler/backend/queue/scheduling_queue_test.go @@ -120,6 +120,7 @@ func TestPriorityQueue_Add(t *testing.T) { logger, ctx := ktesting.NewTestContext(t) ctx, cancel := context.WithCancel(ctx) defer cancel() + metrics.Register() q := NewTestQueueWithObjects(ctx, newDefaultQueueSort(), objs) q.Add(logger, medPriorityPodInfo.Pod) q.Add(logger, unschedulablePodInfo.Pod) @@ -2926,7 +2927,6 @@ func TestPodTimestamp(t *testing.T) { // TestPendingPodsMetric tests Prometheus metrics related with pending pods func TestPendingPodsMetric(t *testing.T) { timestamp := time.Now() - metrics.Register() total := 60 queueableNum := 50 queueable, failme := "queueable", "failme" @@ -2951,6 +2951,7 @@ func TestPendingPodsMetric(t *testing.T) { pInfosWithDelay[i].Attempts = 0 } } + metrics.Register() tests := []struct { name string @@ -3185,11 +3186,11 @@ scheduler_plugin_execution_duration_seconds_count{extension_point="PreEnqueue",p for _, test := range tests { t.Run(test.name, func(t *testing.T) { - resetMetrics() - resetPodInfos() logger, ctx := ktesting.NewTestContext(t) ctx, cancel := context.WithCancel(ctx) defer cancel() + resetMetrics() + resetPodInfos() m := map[string][]framework.PreEnqueuePlugin{"": {&preEnqueuePlugin{allowlists: []string{queueable}}}} recorder := metrics.NewMetricsAsyncRecorder(3, 20*time.Microsecond, ctx.Done()) @@ -3326,8 +3327,8 @@ func TestPerPodSchedulingMetrics(t *testing.T) { func TestIncomingPodsMetrics(t *testing.T) { timestamp := time.Now() unschedulablePlg := "unschedulable_plugin" - metrics.Register() var pInfos = make([]*framework.QueuedPodInfo, 0, 3) + metrics.Register() for i := 1; i <= 3; i++ { p := &framework.QueuedPodInfo{ PodInfo: mustNewTestPodInfo(t, @@ -3399,10 +3400,10 @@ func TestIncomingPodsMetrics(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - metrics.SchedulerQueueIncomingPods.Reset() logger, ctx := ktesting.NewTestContext(t) ctx, cancel := context.WithCancel(ctx) defer cancel() + metrics.SchedulerQueueIncomingPods.Reset() queue := NewTestQueue(ctx, newDefaultQueueSort(), WithClock(testingclock.NewFakeClock(timestamp))) for _, op := range test.operations { for _, pInfo := range pInfos { diff --git a/pkg/scheduler/eventhandlers_test.go b/pkg/scheduler/eventhandlers_test.go index bc2537defd4..52766571dd5 100644 --- a/pkg/scheduler/eventhandlers_test.go +++ b/pkg/scheduler/eventhandlers_test.go @@ -48,6 +48,7 @@ import ( "k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodename" "k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodeports" "k8s.io/kubernetes/pkg/scheduler/framework/plugins/noderesources" + "k8s.io/kubernetes/pkg/scheduler/metrics" st "k8s.io/kubernetes/pkg/scheduler/testing" "k8s.io/kubernetes/pkg/scheduler/util/assumecache" ) @@ -55,6 +56,7 @@ import ( func TestUpdatePodInCache(t *testing.T) { ttl := 10 * time.Second nodeName := "node" + metrics.Register() tests := []struct { name string diff --git a/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption_test.go b/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption_test.go index eed1896ae43..933b24683ee 100644 --- a/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption_test.go +++ b/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption_test.go @@ -59,6 +59,7 @@ import ( "k8s.io/kubernetes/pkg/scheduler/framework/plugins/tainttoleration" "k8s.io/kubernetes/pkg/scheduler/framework/preemption" frameworkruntime "k8s.io/kubernetes/pkg/scheduler/framework/runtime" + "k8s.io/kubernetes/pkg/scheduler/metrics" st "k8s.io/kubernetes/pkg/scheduler/testing" tf "k8s.io/kubernetes/pkg/scheduler/testing/framework" ) @@ -143,6 +144,7 @@ func (pl *TestPlugin) Filter(ctx context.Context, state *framework.CycleState, p } func TestPostFilter(t *testing.T) { + metrics.Register() onePodRes := map[v1.ResourceName]string{v1.ResourcePods: "1"} nodeRes := map[v1.ResourceName]string{v1.ResourceCPU: "200m", v1.ResourceMemory: "400"} tests := []struct { @@ -426,6 +428,7 @@ type candidate struct { } func TestDryRunPreemption(t *testing.T) { + metrics.Register() tests := []struct { name string args *config.DefaultPreemptionArgs diff --git a/pkg/scheduler/framework/plugins/interpodaffinity/filtering_test.go b/pkg/scheduler/framework/plugins/interpodaffinity/filtering_test.go index 8f3d88a2e25..e6c272856b1 100644 --- a/pkg/scheduler/framework/plugins/interpodaffinity/filtering_test.go +++ b/pkg/scheduler/framework/plugins/interpodaffinity/filtering_test.go @@ -31,6 +31,7 @@ import ( "k8s.io/kubernetes/pkg/scheduler/backend/cache" "k8s.io/kubernetes/pkg/scheduler/framework" plugintesting "k8s.io/kubernetes/pkg/scheduler/framework/plugins/testing" + "k8s.io/kubernetes/pkg/scheduler/metrics" st "k8s.io/kubernetes/pkg/scheduler/testing" ) @@ -68,6 +69,7 @@ func TestRequiredAffinitySingleNode(t *testing.T) { } podLabel2 := map[string]string{"security": "S1"} node1 := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node1", Labels: labels1}} + metrics.Register() tests := []struct { pod *v1.Pod diff --git a/pkg/scheduler/framework/plugins/podtopologyspread/filtering_test.go b/pkg/scheduler/framework/plugins/podtopologyspread/filtering_test.go index cf7ea3569d9..fd8c5002be4 100644 --- a/pkg/scheduler/framework/plugins/podtopologyspread/filtering_test.go +++ b/pkg/scheduler/framework/plugins/podtopologyspread/filtering_test.go @@ -35,6 +35,7 @@ import ( "k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature" plugintesting "k8s.io/kubernetes/pkg/scheduler/framework/plugins/testing" frameworkruntime "k8s.io/kubernetes/pkg/scheduler/framework/runtime" + "k8s.io/kubernetes/pkg/scheduler/metrics" st "k8s.io/kubernetes/pkg/scheduler/testing" "k8s.io/utils/ptr" ) @@ -68,6 +69,7 @@ func (p *criticalPaths) sort() { } func TestPreFilterState(t *testing.T) { + metrics.Register() tests := []struct { name string pod *v1.Pod @@ -2388,6 +2390,7 @@ func TestPreFilterStateRemovePod(t *testing.T) { } func BenchmarkFilter(b *testing.B) { + metrics.Register() tests := []struct { name string pod *v1.Pod diff --git a/pkg/scheduler/framework/preemption/preemption_test.go b/pkg/scheduler/framework/preemption/preemption_test.go index f4a0bff99c9..f0189d20952 100644 --- a/pkg/scheduler/framework/preemption/preemption_test.go +++ b/pkg/scheduler/framework/preemption/preemption_test.go @@ -36,6 +36,7 @@ import ( "k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultbinder" "k8s.io/kubernetes/pkg/scheduler/framework/plugins/queuesort" frameworkruntime "k8s.io/kubernetes/pkg/scheduler/framework/runtime" + "k8s.io/kubernetes/pkg/scheduler/metrics" st "k8s.io/kubernetes/pkg/scheduler/testing" tf "k8s.io/kubernetes/pkg/scheduler/testing/framework" ) @@ -113,6 +114,7 @@ func (pl *FakePreemptionScorePostFilterPlugin) OrderedScoreFuncs(ctx context.Con } func TestDryRunPreemption(t *testing.T) { + metrics.Register() tests := []struct { name string nodes []*v1.Node diff --git a/pkg/scheduler/framework/runtime/framework_test.go b/pkg/scheduler/framework/runtime/framework_test.go index 7b0f3ae63e6..d3f27bdb799 100644 --- a/pkg/scheduler/framework/runtime/framework_test.go +++ b/pkg/scheduler/framework/runtime/framework_test.go @@ -457,6 +457,7 @@ func newFrameworkWithQueueSortAndBind(ctx context.Context, r Registry, profile c } func TestInitFrameworkWithScorePlugins(t *testing.T) { + metrics.Register() tests := []struct { name string plugins *config.Plugins @@ -2900,7 +2901,7 @@ func withMetricsRecorder(recorder *metrics.MetricAsyncRecorder) Option { func TestRecordingMetrics(t *testing.T) { state := &framework.CycleState{} state.SetRecordPluginMetrics(true) - + metrics.Register() tests := []struct { name string action func(ctx context.Context, f framework.Framework) @@ -3027,7 +3028,8 @@ func TestRecordingMetrics(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - metrics.Register() + _, ctx := ktesting.NewTestContext(t) + ctx, cancel := context.WithCancel(ctx) metrics.FrameworkExtensionPointDuration.Reset() metrics.PluginExecutionDuration.Reset() @@ -3050,9 +3052,6 @@ func TestRecordingMetrics(t *testing.T) { PostBind: pluginSet, } - _, ctx := ktesting.NewTestContext(t) - ctx, cancel := context.WithCancel(ctx) - recorder := metrics.NewMetricsAsyncRecorder(100, time.Nanosecond, ctx.Done()) profile := config.KubeSchedulerProfile{ PercentageOfNodesToScore: ptr.To[int32](testPercentageOfNodesToScore), @@ -3086,6 +3085,7 @@ func TestRecordingMetrics(t *testing.T) { } func TestRunBindPlugins(t *testing.T) { + metrics.Register() tests := []struct { name string injects []framework.Code @@ -3154,7 +3154,6 @@ func TestRunBindPlugins(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - metrics.Register() metrics.FrameworkExtensionPointDuration.Reset() metrics.PluginExecutionDuration.Reset() @@ -3203,6 +3202,7 @@ func TestRunBindPlugins(t *testing.T) { } func TestPermitWaitDurationMetric(t *testing.T) { + metrics.Register() tests := []struct { name string inject injectedResult @@ -3221,7 +3221,6 @@ func TestPermitWaitDurationMetric(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { _, ctx := ktesting.NewTestContext(t) - metrics.Register() metrics.PermitWaitDuration.Reset() plugin := &TestPlugin{name: testPlugin, inj: tt.inject} diff --git a/pkg/scheduler/metrics/metric_recorder_test.go b/pkg/scheduler/metrics/metric_recorder_test.go index ad12c032ed1..2734b6873a2 100644 --- a/pkg/scheduler/metrics/metric_recorder_test.go +++ b/pkg/scheduler/metrics/metric_recorder_test.go @@ -108,6 +108,7 @@ func TestClear(t *testing.T) { } func TestInFlightEventAsync(t *testing.T) { + Register() r := &MetricAsyncRecorder{ aggregatedInflightEventMetric: map[gaugeVecMetricKey]int{}, aggregatedInflightEventMetricLastFlushTime: time.Now(), diff --git a/pkg/scheduler/metrics/metrics.go b/pkg/scheduler/metrics/metrics.go index a2fd58ba801..84a1bb9e7e5 100644 --- a/pkg/scheduler/metrics/metrics.go +++ b/pkg/scheduler/metrics/metrics.go @@ -87,6 +87,51 @@ const ( // All the histogram based metrics have 1ms as size for the smallest bucket. var ( + scheduleAttempts *metrics.CounterVec + EventHandlingLatency *metrics.HistogramVec + schedulingLatency *metrics.HistogramVec + SchedulingAlgorithmLatency *metrics.Histogram + PreemptionVictims *metrics.Histogram + PreemptionAttempts *metrics.Counter + pendingPods *metrics.GaugeVec + InFlightEvents *metrics.GaugeVec + Goroutines *metrics.GaugeVec + + // PodSchedulingDuration is deprecated as of Kubernetes v1.28, and will be removed + // in v1.31. Please use PodSchedulingSLIDuration instead. + PodSchedulingDuration *metrics.HistogramVec + PodSchedulingSLIDuration *metrics.HistogramVec + PodSchedulingAttempts *metrics.Histogram + FrameworkExtensionPointDuration *metrics.HistogramVec + PluginExecutionDuration *metrics.HistogramVec + + // This is only available when the QHint feature gate is enabled. + queueingHintExecutionDuration *metrics.HistogramVec + SchedulerQueueIncomingPods *metrics.CounterVec + PermitWaitDuration *metrics.HistogramVec + CacheSize *metrics.GaugeVec + unschedulableReasons *metrics.GaugeVec + PluginEvaluationTotal *metrics.CounterVec + metricsList []metrics.Registerable +) + +var registerMetrics sync.Once + +// Register all metrics. +func Register() { + // Register the metrics. + registerMetrics.Do(func() { + InitMetrics() + RegisterMetrics(metricsList...) + if utilfeature.DefaultFeatureGate.Enabled(features.SchedulerQueueingHints) { + RegisterMetrics(queueingHintExecutionDuration) + RegisterMetrics(InFlightEvents) + } + volumebindingmetrics.RegisterVolumeSchedulingMetrics() + }) +} + +func InitMetrics() { scheduleAttempts = metrics.NewCounterVec( &metrics.CounterOpts{ Subsystem: SchedulerSubsystem, @@ -292,21 +337,6 @@ var ( unschedulableReasons, PluginEvaluationTotal, } -) - -var registerMetrics sync.Once - -// Register all metrics. -func Register() { - // Register the metrics. - registerMetrics.Do(func() { - RegisterMetrics(metricsList...) - if utilfeature.DefaultFeatureGate.Enabled(features.SchedulerQueueingHints) { - RegisterMetrics(queueingHintExecutionDuration) - RegisterMetrics(InFlightEvents) - } - volumebindingmetrics.RegisterVolumeSchedulingMetrics() - }) } // RegisterMetrics registers a list of metrics.