Add profile label to scheduler extension point metrics

Signed-off-by: Aldo Culquicondor <acondor@google.com>
This commit is contained in:
Aldo Culquicondor 2020-06-17 17:49:30 -04:00
parent a463b25c9d
commit 698eda3079
6 changed files with 39 additions and 24 deletions

View File

@ -500,7 +500,7 @@ func (g *genericScheduler) findNodesThatPassFilters(ctx context.Context, prof *p
// We record Filter extension point latency here instead of in framework.go because framework.RunFilterPlugins
// function is called for each node, whereas we want to have an overall latency for all nodes per scheduling cycle.
// Note that this latency also includes latency for `addNominatedPods`, which calls framework.RunPreFilterAddPod.
metrics.FrameworkExtensionPointDuration.WithLabelValues(runtime.Filter, statusCode.String()).Observe(metrics.SinceInSeconds(beginCheckNode))
metrics.FrameworkExtensionPointDuration.WithLabelValues(runtime.Filter, statusCode.String(), prof.Name).Observe(metrics.SinceInSeconds(beginCheckNode))
}()
// Stops searching for more nodes once the configured number of feasible nodes

View File

@ -710,7 +710,9 @@ func TestGenericScheduler(t *testing.T) {
if err != nil {
t.Fatal(err)
}
prof := &profile.Profile{Framework: fwk}
prof := &profile.Profile{
Framework: fwk,
}
var pvcs []v1.PersistentVolumeClaim
pvcs = append(pvcs, test.pvcs...)

View File

@ -85,6 +85,7 @@ type frameworkImpl struct {
informerFactory informers.SharedInformerFactory
metricsRecorder *metricsRecorder
profileName string
preemptHandle framework.PreemptHandle
@ -127,6 +128,7 @@ type frameworkOptions struct {
informerFactory informers.SharedInformerFactory
snapshotSharedLister framework.SharedLister
metricsRecorder *metricsRecorder
profileName string
podNominator framework.PodNominator
extenders []framework.Extender
runAllFilters bool
@ -171,6 +173,13 @@ func WithRunAllFilters(runAllFilters bool) Option {
}
}
// WithProfileName sets the profile name.
func WithProfileName(name string) Option {
return func(o *frameworkOptions) {
o.profileName = name
}
}
// withMetricsRecorder is only used in tests.
func withMetricsRecorder(recorder *metricsRecorder) Option {
return func(o *frameworkOptions) {
@ -228,6 +237,7 @@ func NewFramework(r Registry, plugins *config.Plugins, args []config.PluginConfi
eventRecorder: options.eventRecorder,
informerFactory: options.informerFactory,
metricsRecorder: options.metricsRecorder,
profileName: options.profileName,
runAllFilters: options.runAllFilters,
}
f.preemptHandle = &preemptHandle{
@ -381,7 +391,7 @@ func (f *frameworkImpl) QueueSortFunc() framework.LessFunc {
func (f *frameworkImpl) RunPreFilterPlugins(ctx context.Context, state *framework.CycleState, pod *v1.Pod) (status *framework.Status) {
startTime := time.Now()
defer func() {
metrics.FrameworkExtensionPointDuration.WithLabelValues(preFilter, status.Code().String()).Observe(metrics.SinceInSeconds(startTime))
metrics.FrameworkExtensionPointDuration.WithLabelValues(preFilter, status.Code().String(), f.profileName).Observe(metrics.SinceInSeconds(startTime))
}()
for _, pl := range f.preFilterPlugins {
status = f.runPreFilterPlugin(ctx, pl, state, pod)
@ -528,7 +538,7 @@ func (f *frameworkImpl) runFilterPlugin(ctx context.Context, pl framework.Filter
func (f *frameworkImpl) RunPostFilterPlugins(ctx context.Context, state *framework.CycleState, pod *v1.Pod, filteredNodeStatusMap framework.NodeToStatusMap) (_ *framework.PostFilterResult, status *framework.Status) {
startTime := time.Now()
defer func() {
metrics.FrameworkExtensionPointDuration.WithLabelValues(postFilter, status.Code().String()).Observe(metrics.SinceInSeconds(startTime))
metrics.FrameworkExtensionPointDuration.WithLabelValues(postFilter, status.Code().String(), f.profileName).Observe(metrics.SinceInSeconds(startTime))
}()
statuses := make(framework.PluginToStatus)
@ -566,7 +576,7 @@ func (f *frameworkImpl) RunPreScorePlugins(
) (status *framework.Status) {
startTime := time.Now()
defer func() {
metrics.FrameworkExtensionPointDuration.WithLabelValues(preScore, status.Code().String()).Observe(metrics.SinceInSeconds(startTime))
metrics.FrameworkExtensionPointDuration.WithLabelValues(preScore, status.Code().String(), f.profileName).Observe(metrics.SinceInSeconds(startTime))
}()
for _, pl := range f.preScorePlugins {
status = f.runPreScorePlugin(ctx, pl, state, pod, nodes)
@ -597,7 +607,7 @@ func (f *frameworkImpl) runPreScorePlugin(ctx context.Context, pl framework.PreS
func (f *frameworkImpl) RunScorePlugins(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodes []*v1.Node) (ps framework.PluginToNodeScores, status *framework.Status) {
startTime := time.Now()
defer func() {
metrics.FrameworkExtensionPointDuration.WithLabelValues(score, status.Code().String()).Observe(metrics.SinceInSeconds(startTime))
metrics.FrameworkExtensionPointDuration.WithLabelValues(score, status.Code().String(), f.profileName).Observe(metrics.SinceInSeconds(startTime))
}()
pluginToNodeScores := make(framework.PluginToNodeScores, len(f.scorePlugins))
for _, pl := range f.scorePlugins {
@ -699,7 +709,7 @@ func (f *frameworkImpl) runScoreExtension(ctx context.Context, pl framework.Scor
func (f *frameworkImpl) RunPreBindPlugins(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) (status *framework.Status) {
startTime := time.Now()
defer func() {
metrics.FrameworkExtensionPointDuration.WithLabelValues(preBind, status.Code().String()).Observe(metrics.SinceInSeconds(startTime))
metrics.FrameworkExtensionPointDuration.WithLabelValues(preBind, status.Code().String(), f.profileName).Observe(metrics.SinceInSeconds(startTime))
}()
for _, pl := range f.preBindPlugins {
status = f.runPreBindPlugin(ctx, pl, state, pod, nodeName)
@ -726,7 +736,7 @@ func (f *frameworkImpl) runPreBindPlugin(ctx context.Context, pl framework.PreBi
func (f *frameworkImpl) RunBindPlugins(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) (status *framework.Status) {
startTime := time.Now()
defer func() {
metrics.FrameworkExtensionPointDuration.WithLabelValues(bind, status.Code().String()).Observe(metrics.SinceInSeconds(startTime))
metrics.FrameworkExtensionPointDuration.WithLabelValues(bind, status.Code().String(), f.profileName).Observe(metrics.SinceInSeconds(startTime))
}()
if len(f.bindPlugins) == 0 {
return framework.NewStatus(framework.Skip, "")
@ -760,7 +770,7 @@ func (f *frameworkImpl) runBindPlugin(ctx context.Context, bp framework.BindPlug
func (f *frameworkImpl) RunPostBindPlugins(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) {
startTime := time.Now()
defer func() {
metrics.FrameworkExtensionPointDuration.WithLabelValues(postBind, framework.Success.String()).Observe(metrics.SinceInSeconds(startTime))
metrics.FrameworkExtensionPointDuration.WithLabelValues(postBind, framework.Success.String(), f.profileName).Observe(metrics.SinceInSeconds(startTime))
}()
for _, pl := range f.postBindPlugins {
f.runPostBindPlugin(ctx, pl, state, pod, nodeName)
@ -783,7 +793,7 @@ func (f *frameworkImpl) runPostBindPlugin(ctx context.Context, pl framework.Post
func (f *frameworkImpl) RunReservePlugins(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) (status *framework.Status) {
startTime := time.Now()
defer func() {
metrics.FrameworkExtensionPointDuration.WithLabelValues(reserve, status.Code().String()).Observe(metrics.SinceInSeconds(startTime))
metrics.FrameworkExtensionPointDuration.WithLabelValues(reserve, status.Code().String(), f.profileName).Observe(metrics.SinceInSeconds(startTime))
}()
for _, pl := range f.reservePlugins {
status = f.runReservePlugin(ctx, pl, state, pod, nodeName)
@ -810,7 +820,7 @@ func (f *frameworkImpl) runReservePlugin(ctx context.Context, pl framework.Reser
func (f *frameworkImpl) RunUnreservePlugins(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) {
startTime := time.Now()
defer func() {
metrics.FrameworkExtensionPointDuration.WithLabelValues(unreserve, framework.Success.String()).Observe(metrics.SinceInSeconds(startTime))
metrics.FrameworkExtensionPointDuration.WithLabelValues(unreserve, framework.Success.String(), f.profileName).Observe(metrics.SinceInSeconds(startTime))
}()
for _, pl := range f.unreservePlugins {
f.runUnreservePlugin(ctx, pl, state, pod, nodeName)
@ -836,7 +846,7 @@ func (f *frameworkImpl) runUnreservePlugin(ctx context.Context, pl framework.Unr
func (f *frameworkImpl) RunPermitPlugins(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) (status *framework.Status) {
startTime := time.Now()
defer func() {
metrics.FrameworkExtensionPointDuration.WithLabelValues(permit, status.Code().String()).Observe(metrics.SinceInSeconds(startTime))
metrics.FrameworkExtensionPointDuration.WithLabelValues(permit, status.Code().String(), f.profileName).Observe(metrics.SinceInSeconds(startTime))
}()
pluginsWaitTime := make(map[string]time.Duration)
statusCode := framework.Success

View File

@ -48,6 +48,8 @@ const (
testPlugin = "test-plugin"
permitPlugin = "permit-plugin"
bindPlugin = "bind-plugin"
testProfileName = "test-profile"
)
// TestScoreWithNormalizePlugin implements ScoreWithNormalizePlugin interface.
@ -1719,7 +1721,7 @@ func TestRecordingMetrics(t *testing.T) {
Unreserve: pluginSet,
}
recorder := newMetricsRecorder(100, time.Nanosecond)
f, err := newFrameworkWithQueueSortAndBind(r, plugins, emptyArgs, withMetricsRecorder(recorder))
f, err := newFrameworkWithQueueSortAndBind(r, plugins, emptyArgs, withMetricsRecorder(recorder), WithProfileName(testProfileName))
if err != nil {
t.Fatalf("Failed to create framework for testing: %v", err)
}
@ -1824,7 +1826,7 @@ func TestRunBindPlugins(t *testing.T) {
}
plugins := &config.Plugins{Bind: pluginSet}
recorder := newMetricsRecorder(100, time.Nanosecond)
fwk, err := newFrameworkWithQueueSortAndBind(r, plugins, emptyArgs, withMetricsRecorder(recorder))
fwk, err := newFrameworkWithQueueSortAndBind(r, plugins, emptyArgs, withMetricsRecorder(recorder), WithProfileName(testProfileName))
if err != nil {
t.Fatal(err)
}
@ -2073,16 +2075,17 @@ func collectAndCompareFrameworkMetrics(t *testing.T, wantExtensionPoint string,
t.Helper()
m := collectHistogramMetric(metrics.FrameworkExtensionPointDuration)
if len(m.Label) != 2 {
t.Fatalf("Unexpected number of label pairs, got: %v, want: 2", len(m.Label))
gotLabels := make(map[string]string, len(m.Label))
for _, p := range m.Label {
gotLabels[p.GetName()] = p.GetValue()
}
if *m.Label[0].Value != wantExtensionPoint {
t.Errorf("Unexpected extension point label, got: %q, want %q", *m.Label[0].Value, wantExtensionPoint)
wantLabels := map[string]string{
"extension_point": wantExtensionPoint,
"status": wantStatus.String(),
"profile": testProfileName,
}
if *m.Label[1].Value != wantStatus.String() {
t.Errorf("Unexpected status code label, got: %q, want %q", *m.Label[1].Value, wantStatus)
if diff := cmp.Diff(wantLabels, gotLabels); diff != "" {
t.Errorf("unexpected labels (-want,+got):\n%s", diff)
}
if *m.Histogram.SampleCount != 1 {

View File

@ -182,7 +182,7 @@ var (
Buckets: metrics.ExponentialBuckets(0.0001, 2, 12),
StabilityLevel: metrics.ALPHA,
},
[]string{"extension_point", "status"})
[]string{"extension_point", "status", "profile"})
PluginExecutionDuration = metrics.NewHistogramVec(
&metrics.HistogramOpts{

View File

@ -47,7 +47,7 @@ type Profile struct {
func NewProfile(cfg config.KubeSchedulerProfile, frameworkFact FrameworkFactory, recorderFact RecorderFactory,
opts ...frameworkruntime.Option) (*Profile, error) {
recorder := recorderFact(cfg.SchedulerName)
opts = append(opts, frameworkruntime.WithEventRecorder(recorder))
opts = append(opts, frameworkruntime.WithEventRecorder(recorder), frameworkruntime.WithProfileName(cfg.SchedulerName))
fwk, err := frameworkFact(cfg, opts...)
if err != nil {
return nil, err