From 698eda3079bb5af3c5b617bbd230a27f8a690e63 Mon Sep 17 00:00:00 2001 From: Aldo Culquicondor Date: Wed, 17 Jun 2020 17:49:30 -0400 Subject: [PATCH] Add profile label to scheduler extension point metrics Signed-off-by: Aldo Culquicondor --- pkg/scheduler/core/generic_scheduler.go | 2 +- pkg/scheduler/core/generic_scheduler_test.go | 4 ++- pkg/scheduler/framework/runtime/framework.go | 30 ++++++++++++------- .../framework/runtime/framework_test.go | 23 +++++++------- pkg/scheduler/metrics/metrics.go | 2 +- pkg/scheduler/profile/profile.go | 2 +- 6 files changed, 39 insertions(+), 24 deletions(-) diff --git a/pkg/scheduler/core/generic_scheduler.go b/pkg/scheduler/core/generic_scheduler.go index 5a5541423ab..bdddf81fafd 100644 --- a/pkg/scheduler/core/generic_scheduler.go +++ b/pkg/scheduler/core/generic_scheduler.go @@ -500,7 +500,7 @@ func (g *genericScheduler) findNodesThatPassFilters(ctx context.Context, prof *p // We record Filter extension point latency here instead of in framework.go because framework.RunFilterPlugins // function is called for each node, whereas we want to have an overall latency for all nodes per scheduling cycle. // Note that this latency also includes latency for `addNominatedPods`, which calls framework.RunPreFilterAddPod. - metrics.FrameworkExtensionPointDuration.WithLabelValues(runtime.Filter, statusCode.String()).Observe(metrics.SinceInSeconds(beginCheckNode)) + metrics.FrameworkExtensionPointDuration.WithLabelValues(runtime.Filter, statusCode.String(), prof.Name).Observe(metrics.SinceInSeconds(beginCheckNode)) }() // Stops searching for more nodes once the configured number of feasible nodes diff --git a/pkg/scheduler/core/generic_scheduler_test.go b/pkg/scheduler/core/generic_scheduler_test.go index 98922a639e1..d5684c14018 100644 --- a/pkg/scheduler/core/generic_scheduler_test.go +++ b/pkg/scheduler/core/generic_scheduler_test.go @@ -710,7 +710,9 @@ func TestGenericScheduler(t *testing.T) { if err != nil { t.Fatal(err) } - prof := &profile.Profile{Framework: fwk} + prof := &profile.Profile{ + Framework: fwk, + } var pvcs []v1.PersistentVolumeClaim pvcs = append(pvcs, test.pvcs...) diff --git a/pkg/scheduler/framework/runtime/framework.go b/pkg/scheduler/framework/runtime/framework.go index 917455edfca..4ca4229b1e5 100644 --- a/pkg/scheduler/framework/runtime/framework.go +++ b/pkg/scheduler/framework/runtime/framework.go @@ -85,6 +85,7 @@ type frameworkImpl struct { informerFactory informers.SharedInformerFactory metricsRecorder *metricsRecorder + profileName string preemptHandle framework.PreemptHandle @@ -127,6 +128,7 @@ type frameworkOptions struct { informerFactory informers.SharedInformerFactory snapshotSharedLister framework.SharedLister metricsRecorder *metricsRecorder + profileName string podNominator framework.PodNominator extenders []framework.Extender runAllFilters bool @@ -171,6 +173,13 @@ func WithRunAllFilters(runAllFilters bool) Option { } } +// WithProfileName sets the profile name. +func WithProfileName(name string) Option { + return func(o *frameworkOptions) { + o.profileName = name + } +} + // withMetricsRecorder is only used in tests. func withMetricsRecorder(recorder *metricsRecorder) Option { return func(o *frameworkOptions) { @@ -228,6 +237,7 @@ func NewFramework(r Registry, plugins *config.Plugins, args []config.PluginConfi eventRecorder: options.eventRecorder, informerFactory: options.informerFactory, metricsRecorder: options.metricsRecorder, + profileName: options.profileName, runAllFilters: options.runAllFilters, } f.preemptHandle = &preemptHandle{ @@ -381,7 +391,7 @@ func (f *frameworkImpl) QueueSortFunc() framework.LessFunc { func (f *frameworkImpl) RunPreFilterPlugins(ctx context.Context, state *framework.CycleState, pod *v1.Pod) (status *framework.Status) { startTime := time.Now() defer func() { - metrics.FrameworkExtensionPointDuration.WithLabelValues(preFilter, status.Code().String()).Observe(metrics.SinceInSeconds(startTime)) + metrics.FrameworkExtensionPointDuration.WithLabelValues(preFilter, status.Code().String(), f.profileName).Observe(metrics.SinceInSeconds(startTime)) }() for _, pl := range f.preFilterPlugins { status = f.runPreFilterPlugin(ctx, pl, state, pod) @@ -528,7 +538,7 @@ func (f *frameworkImpl) runFilterPlugin(ctx context.Context, pl framework.Filter func (f *frameworkImpl) RunPostFilterPlugins(ctx context.Context, state *framework.CycleState, pod *v1.Pod, filteredNodeStatusMap framework.NodeToStatusMap) (_ *framework.PostFilterResult, status *framework.Status) { startTime := time.Now() defer func() { - metrics.FrameworkExtensionPointDuration.WithLabelValues(postFilter, status.Code().String()).Observe(metrics.SinceInSeconds(startTime)) + metrics.FrameworkExtensionPointDuration.WithLabelValues(postFilter, status.Code().String(), f.profileName).Observe(metrics.SinceInSeconds(startTime)) }() statuses := make(framework.PluginToStatus) @@ -566,7 +576,7 @@ func (f *frameworkImpl) RunPreScorePlugins( ) (status *framework.Status) { startTime := time.Now() defer func() { - metrics.FrameworkExtensionPointDuration.WithLabelValues(preScore, status.Code().String()).Observe(metrics.SinceInSeconds(startTime)) + metrics.FrameworkExtensionPointDuration.WithLabelValues(preScore, status.Code().String(), f.profileName).Observe(metrics.SinceInSeconds(startTime)) }() for _, pl := range f.preScorePlugins { status = f.runPreScorePlugin(ctx, pl, state, pod, nodes) @@ -597,7 +607,7 @@ func (f *frameworkImpl) runPreScorePlugin(ctx context.Context, pl framework.PreS func (f *frameworkImpl) RunScorePlugins(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodes []*v1.Node) (ps framework.PluginToNodeScores, status *framework.Status) { startTime := time.Now() defer func() { - metrics.FrameworkExtensionPointDuration.WithLabelValues(score, status.Code().String()).Observe(metrics.SinceInSeconds(startTime)) + metrics.FrameworkExtensionPointDuration.WithLabelValues(score, status.Code().String(), f.profileName).Observe(metrics.SinceInSeconds(startTime)) }() pluginToNodeScores := make(framework.PluginToNodeScores, len(f.scorePlugins)) for _, pl := range f.scorePlugins { @@ -699,7 +709,7 @@ func (f *frameworkImpl) runScoreExtension(ctx context.Context, pl framework.Scor func (f *frameworkImpl) RunPreBindPlugins(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) (status *framework.Status) { startTime := time.Now() defer func() { - metrics.FrameworkExtensionPointDuration.WithLabelValues(preBind, status.Code().String()).Observe(metrics.SinceInSeconds(startTime)) + metrics.FrameworkExtensionPointDuration.WithLabelValues(preBind, status.Code().String(), f.profileName).Observe(metrics.SinceInSeconds(startTime)) }() for _, pl := range f.preBindPlugins { status = f.runPreBindPlugin(ctx, pl, state, pod, nodeName) @@ -726,7 +736,7 @@ func (f *frameworkImpl) runPreBindPlugin(ctx context.Context, pl framework.PreBi func (f *frameworkImpl) RunBindPlugins(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) (status *framework.Status) { startTime := time.Now() defer func() { - metrics.FrameworkExtensionPointDuration.WithLabelValues(bind, status.Code().String()).Observe(metrics.SinceInSeconds(startTime)) + metrics.FrameworkExtensionPointDuration.WithLabelValues(bind, status.Code().String(), f.profileName).Observe(metrics.SinceInSeconds(startTime)) }() if len(f.bindPlugins) == 0 { return framework.NewStatus(framework.Skip, "") @@ -760,7 +770,7 @@ func (f *frameworkImpl) runBindPlugin(ctx context.Context, bp framework.BindPlug func (f *frameworkImpl) RunPostBindPlugins(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) { startTime := time.Now() defer func() { - metrics.FrameworkExtensionPointDuration.WithLabelValues(postBind, framework.Success.String()).Observe(metrics.SinceInSeconds(startTime)) + metrics.FrameworkExtensionPointDuration.WithLabelValues(postBind, framework.Success.String(), f.profileName).Observe(metrics.SinceInSeconds(startTime)) }() for _, pl := range f.postBindPlugins { f.runPostBindPlugin(ctx, pl, state, pod, nodeName) @@ -783,7 +793,7 @@ func (f *frameworkImpl) runPostBindPlugin(ctx context.Context, pl framework.Post func (f *frameworkImpl) RunReservePlugins(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) (status *framework.Status) { startTime := time.Now() defer func() { - metrics.FrameworkExtensionPointDuration.WithLabelValues(reserve, status.Code().String()).Observe(metrics.SinceInSeconds(startTime)) + metrics.FrameworkExtensionPointDuration.WithLabelValues(reserve, status.Code().String(), f.profileName).Observe(metrics.SinceInSeconds(startTime)) }() for _, pl := range f.reservePlugins { status = f.runReservePlugin(ctx, pl, state, pod, nodeName) @@ -810,7 +820,7 @@ func (f *frameworkImpl) runReservePlugin(ctx context.Context, pl framework.Reser func (f *frameworkImpl) RunUnreservePlugins(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) { startTime := time.Now() defer func() { - metrics.FrameworkExtensionPointDuration.WithLabelValues(unreserve, framework.Success.String()).Observe(metrics.SinceInSeconds(startTime)) + metrics.FrameworkExtensionPointDuration.WithLabelValues(unreserve, framework.Success.String(), f.profileName).Observe(metrics.SinceInSeconds(startTime)) }() for _, pl := range f.unreservePlugins { f.runUnreservePlugin(ctx, pl, state, pod, nodeName) @@ -836,7 +846,7 @@ func (f *frameworkImpl) runUnreservePlugin(ctx context.Context, pl framework.Unr func (f *frameworkImpl) RunPermitPlugins(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) (status *framework.Status) { startTime := time.Now() defer func() { - metrics.FrameworkExtensionPointDuration.WithLabelValues(permit, status.Code().String()).Observe(metrics.SinceInSeconds(startTime)) + metrics.FrameworkExtensionPointDuration.WithLabelValues(permit, status.Code().String(), f.profileName).Observe(metrics.SinceInSeconds(startTime)) }() pluginsWaitTime := make(map[string]time.Duration) statusCode := framework.Success diff --git a/pkg/scheduler/framework/runtime/framework_test.go b/pkg/scheduler/framework/runtime/framework_test.go index 91fe8decceb..0ff4a8c3045 100644 --- a/pkg/scheduler/framework/runtime/framework_test.go +++ b/pkg/scheduler/framework/runtime/framework_test.go @@ -48,6 +48,8 @@ const ( testPlugin = "test-plugin" permitPlugin = "permit-plugin" bindPlugin = "bind-plugin" + + testProfileName = "test-profile" ) // TestScoreWithNormalizePlugin implements ScoreWithNormalizePlugin interface. @@ -1719,7 +1721,7 @@ func TestRecordingMetrics(t *testing.T) { Unreserve: pluginSet, } recorder := newMetricsRecorder(100, time.Nanosecond) - f, err := newFrameworkWithQueueSortAndBind(r, plugins, emptyArgs, withMetricsRecorder(recorder)) + f, err := newFrameworkWithQueueSortAndBind(r, plugins, emptyArgs, withMetricsRecorder(recorder), WithProfileName(testProfileName)) if err != nil { t.Fatalf("Failed to create framework for testing: %v", err) } @@ -1824,7 +1826,7 @@ func TestRunBindPlugins(t *testing.T) { } plugins := &config.Plugins{Bind: pluginSet} recorder := newMetricsRecorder(100, time.Nanosecond) - fwk, err := newFrameworkWithQueueSortAndBind(r, plugins, emptyArgs, withMetricsRecorder(recorder)) + fwk, err := newFrameworkWithQueueSortAndBind(r, plugins, emptyArgs, withMetricsRecorder(recorder), WithProfileName(testProfileName)) if err != nil { t.Fatal(err) } @@ -2073,16 +2075,17 @@ func collectAndCompareFrameworkMetrics(t *testing.T, wantExtensionPoint string, t.Helper() m := collectHistogramMetric(metrics.FrameworkExtensionPointDuration) - if len(m.Label) != 2 { - t.Fatalf("Unexpected number of label pairs, got: %v, want: 2", len(m.Label)) + gotLabels := make(map[string]string, len(m.Label)) + for _, p := range m.Label { + gotLabels[p.GetName()] = p.GetValue() } - - if *m.Label[0].Value != wantExtensionPoint { - t.Errorf("Unexpected extension point label, got: %q, want %q", *m.Label[0].Value, wantExtensionPoint) + wantLabels := map[string]string{ + "extension_point": wantExtensionPoint, + "status": wantStatus.String(), + "profile": testProfileName, } - - if *m.Label[1].Value != wantStatus.String() { - t.Errorf("Unexpected status code label, got: %q, want %q", *m.Label[1].Value, wantStatus) + if diff := cmp.Diff(wantLabels, gotLabels); diff != "" { + t.Errorf("unexpected labels (-want,+got):\n%s", diff) } if *m.Histogram.SampleCount != 1 { diff --git a/pkg/scheduler/metrics/metrics.go b/pkg/scheduler/metrics/metrics.go index 52949c39d8d..2416e830578 100644 --- a/pkg/scheduler/metrics/metrics.go +++ b/pkg/scheduler/metrics/metrics.go @@ -182,7 +182,7 @@ var ( Buckets: metrics.ExponentialBuckets(0.0001, 2, 12), StabilityLevel: metrics.ALPHA, }, - []string{"extension_point", "status"}) + []string{"extension_point", "status", "profile"}) PluginExecutionDuration = metrics.NewHistogramVec( &metrics.HistogramOpts{ diff --git a/pkg/scheduler/profile/profile.go b/pkg/scheduler/profile/profile.go index 6726eaeba70..0dc28324502 100644 --- a/pkg/scheduler/profile/profile.go +++ b/pkg/scheduler/profile/profile.go @@ -47,7 +47,7 @@ type Profile struct { func NewProfile(cfg config.KubeSchedulerProfile, frameworkFact FrameworkFactory, recorderFact RecorderFactory, opts ...frameworkruntime.Option) (*Profile, error) { recorder := recorderFact(cfg.SchedulerName) - opts = append(opts, frameworkruntime.WithEventRecorder(recorder)) + opts = append(opts, frameworkruntime.WithEventRecorder(recorder), frameworkruntime.WithProfileName(cfg.SchedulerName)) fwk, err := frameworkFact(cfg, opts...) if err != nil { return nil, err