Merge pull request #121715 from pohly/scheduler-logging-with-instrumentation

scheduler: fix performance regression at -v3 + contextual logging
This commit is contained in:
Kubernetes Prow Robot 2023-11-03 21:08:25 +01:00 committed by GitHub
commit 263ab254ef
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 132 additions and 93 deletions

View File

@ -655,14 +655,16 @@ func (f *frameworkImpl) RunPreFilterPlugins(ctx context.Context, state *framewor
var result *framework.PreFilterResult
var pluginsWithNodes []string
logger := klog.FromContext(ctx)
logger = klog.LoggerWithName(logger, "PreFilter")
// TODO(knelasevero): Remove duplicated keys from log entry calls
// When contextualized logging hits GA
// https://github.com/kubernetes/kubernetes/issues/111672
logger = klog.LoggerWithValues(logger, "pod", klog.KObj(pod))
verboseLogs := logger.V(4).Enabled()
if verboseLogs {
logger = klog.LoggerWithName(logger, "PreFilter")
}
for _, pl := range f.preFilterPlugins {
logger := klog.LoggerWithName(logger, pl.Name())
ctx := klog.NewContext(ctx, logger)
ctx := ctx
if verboseLogs {
logger := klog.LoggerWithName(logger, pl.Name())
ctx = klog.NewContext(ctx, logger)
}
r, s := f.runPreFilterPlugin(ctx, pl, state, pod)
if s.IsSkip() {
skipPlugins.Insert(pl.Name())
@ -711,17 +713,19 @@ func (f *frameworkImpl) RunPreFilterExtensionAddPod(
nodeInfo *framework.NodeInfo,
) (status *framework.Status) {
logger := klog.FromContext(ctx)
logger = klog.LoggerWithName(logger, "PreFilterExtension")
// TODO(knelasevero): Remove duplicated keys from log entry calls
// When contextualized logging hits GA
// https://github.com/kubernetes/kubernetes/issues/111672
logger = klog.LoggerWithValues(logger, "pod", klog.KObj(podToSchedule), "node", klog.KObj(nodeInfo.Node()), "operation", "addPod")
verboseLogs := logger.V(4).Enabled()
if verboseLogs {
logger = klog.LoggerWithName(logger, "PreFilterExtension")
}
for _, pl := range f.preFilterPlugins {
if pl.PreFilterExtensions() == nil || state.SkipFilterPlugins.Has(pl.Name()) {
continue
}
logger := klog.LoggerWithName(logger, pl.Name())
ctx := klog.NewContext(ctx, logger)
ctx := ctx
if verboseLogs {
logger := klog.LoggerWithName(logger, pl.Name())
ctx = klog.NewContext(ctx, logger)
}
status = f.runPreFilterExtensionAddPod(ctx, pl, state, podToSchedule, podInfoToAdd, nodeInfo)
if !status.IsSuccess() {
err := status.AsError()
@ -754,17 +758,19 @@ func (f *frameworkImpl) RunPreFilterExtensionRemovePod(
nodeInfo *framework.NodeInfo,
) (status *framework.Status) {
logger := klog.FromContext(ctx)
logger = klog.LoggerWithName(logger, "PreFilterExtension")
// TODO(knelasevero): Remove duplicated keys from log entry calls
// When contextualized logging hits GA
// https://github.com/kubernetes/kubernetes/issues/111672
logger = klog.LoggerWithValues(logger, klog.KObj(podToSchedule), "node", klog.KObj(nodeInfo.Node()))
verboseLogs := logger.V(4).Enabled()
if verboseLogs {
logger = klog.LoggerWithName(logger, "PreFilterExtension")
}
for _, pl := range f.preFilterPlugins {
if pl.PreFilterExtensions() == nil || state.SkipFilterPlugins.Has(pl.Name()) {
continue
}
logger := klog.LoggerWithName(logger, pl.Name())
ctx := klog.NewContext(ctx, logger)
ctx := ctx
if verboseLogs {
logger := klog.LoggerWithName(logger, pl.Name())
ctx = klog.NewContext(ctx, logger)
}
status = f.runPreFilterExtensionRemovePod(ctx, pl, state, podToSchedule, podInfoToRemove, nodeInfo)
if !status.IsSuccess() {
err := status.AsError()
@ -797,18 +803,20 @@ func (f *frameworkImpl) RunFilterPlugins(
nodeInfo *framework.NodeInfo,
) *framework.Status {
logger := klog.FromContext(ctx)
logger = klog.LoggerWithName(logger, "Filter")
// TODO(knelasevero): Remove duplicated keys from log entry calls
// When contextualized logging hits GA
// https://github.com/kubernetes/kubernetes/issues/111672
logger = klog.LoggerWithValues(logger, "pod", klog.KObj(pod), "node", klog.KObj(nodeInfo.Node()))
verboseLogs := logger.V(4).Enabled()
if verboseLogs {
logger = klog.LoggerWithName(logger, "Filter")
}
for _, pl := range f.filterPlugins {
logger := klog.LoggerWithName(logger, pl.Name())
ctx := klog.NewContext(ctx, logger)
if state.SkipFilterPlugins.Has(pl.Name()) {
continue
}
ctx := ctx
if verboseLogs {
logger := klog.LoggerWithName(logger, pl.Name())
ctx = klog.NewContext(ctx, logger)
}
if status := f.runFilterPlugin(ctx, pl, state, pod, nodeInfo); !status.IsSuccess() {
if !status.IsRejected() {
// Filter plugins are not supposed to return any status other than
@ -842,19 +850,21 @@ func (f *frameworkImpl) RunPostFilterPlugins(ctx context.Context, state *framewo
}()
logger := klog.FromContext(ctx)
logger = klog.LoggerWithName(logger, "PostFilter")
// TODO(knelasevero): Remove duplicated keys from log entry calls
// When contextualized logging hits GA
// https://github.com/kubernetes/kubernetes/issues/111672
logger = klog.LoggerWithValues(logger, "pod", klog.KObj(pod))
verboseLogs := logger.V(4).Enabled()
if verboseLogs {
logger = klog.LoggerWithName(logger, "PostFilter")
}
// `result` records the last meaningful(non-noop) PostFilterResult.
var result *framework.PostFilterResult
var reasons []string
var rejectorPlugin string
for _, pl := range f.postFilterPlugins {
logger := klog.LoggerWithName(logger, pl.Name())
ctx := klog.NewContext(ctx, logger)
ctx := ctx
if verboseLogs {
logger := klog.LoggerWithName(logger, pl.Name())
ctx = klog.NewContext(ctx, logger)
}
r, s := f.runPostFilterPlugin(ctx, pl, state, pod, filteredNodeStatusMap)
if s.IsSuccess() {
return r, s
@ -990,14 +1000,16 @@ func (f *frameworkImpl) RunPreScorePlugins(
metrics.FrameworkExtensionPointDuration.WithLabelValues(metrics.PreScore, status.Code().String(), f.profileName).Observe(metrics.SinceInSeconds(startTime))
}()
logger := klog.FromContext(ctx)
logger = klog.LoggerWithName(logger, "PreScore")
// TODO(knelasevero): Remove duplicated keys from log entry calls
// When contextualized logging hits GA
// https://github.com/kubernetes/kubernetes/issues/111672
logger = klog.LoggerWithValues(logger, "pod", klog.KObj(pod))
verboseLogs := logger.V(4).Enabled()
if verboseLogs {
logger = klog.LoggerWithName(logger, "PreScore")
}
for _, pl := range f.preScorePlugins {
logger := klog.LoggerWithName(logger, pl.Name())
ctx := klog.NewContext(ctx, logger)
ctx := ctx
if verboseLogs {
logger := klog.LoggerWithName(logger, pl.Name())
ctx = klog.NewContext(ctx, logger)
}
status = f.runPreScorePlugin(ctx, pl, state, pod, nodes)
if status.IsSkip() {
skipPlugins.Insert(pl.Name())
@ -1046,18 +1058,23 @@ func (f *frameworkImpl) RunScorePlugins(ctx context.Context, state *framework.Cy
if len(plugins) > 0 {
logger := klog.FromContext(ctx)
logger = klog.LoggerWithName(logger, "Score")
// TODO(knelasevero): Remove duplicated keys from log entry calls
// When contextualized logging hits GA
// https://github.com/kubernetes/kubernetes/issues/111672
logger = klog.LoggerWithValues(logger, "pod", klog.KObj(pod))
verboseLogs := logger.V(4).Enabled()
if verboseLogs {
logger = klog.LoggerWithName(logger, "Score")
}
// Run Score method for each node in parallel.
f.Parallelizer().Until(ctx, len(nodes), func(index int) {
nodeName := nodes[index].Name
logger := klog.LoggerWithValues(logger, "node", klog.ObjectRef{Name: nodeName})
logger := logger
if verboseLogs {
logger = klog.LoggerWithValues(logger, "node", klog.ObjectRef{Name: nodeName})
}
for _, pl := range plugins {
logger := klog.LoggerWithName(logger, pl.Name())
ctx := klog.NewContext(ctx, logger)
ctx := ctx
if verboseLogs {
logger := klog.LoggerWithName(logger, pl.Name())
ctx = klog.NewContext(ctx, logger)
}
s, status := f.runScorePlugin(ctx, pl, state, pod, nodeName)
if !status.IsSuccess() {
err := fmt.Errorf("plugin %q failed with: %w", pl.Name(), status.AsError())
@ -1156,14 +1173,17 @@ func (f *frameworkImpl) RunPreBindPlugins(ctx context.Context, state *framework.
metrics.FrameworkExtensionPointDuration.WithLabelValues(metrics.PreBind, status.Code().String(), f.profileName).Observe(metrics.SinceInSeconds(startTime))
}()
logger := klog.FromContext(ctx)
logger = klog.LoggerWithName(logger, "PreBind")
// TODO(knelasevero): Remove duplicated keys from log entry calls
// When contextualized logging hits GA
// https://github.com/kubernetes/kubernetes/issues/111672
logger = klog.LoggerWithValues(logger, "pod", klog.KObj(pod), "node", klog.ObjectRef{Name: nodeName})
verboseLogs := logger.V(4).Enabled()
if verboseLogs {
logger = klog.LoggerWithName(logger, "PreBind")
logger = klog.LoggerWithValues(logger, "node", klog.ObjectRef{Name: nodeName})
}
for _, pl := range f.preBindPlugins {
logger := klog.LoggerWithName(logger, pl.Name())
ctx := klog.NewContext(ctx, logger)
ctx := ctx
if verboseLogs {
logger := klog.LoggerWithName(logger, pl.Name())
ctx = klog.NewContext(ctx, logger)
}
status = f.runPreBindPlugin(ctx, pl, state, pod, nodeName)
if !status.IsSuccess() {
if status.IsRejected() {
@ -1199,14 +1219,16 @@ func (f *frameworkImpl) RunBindPlugins(ctx context.Context, state *framework.Cyc
return framework.NewStatus(framework.Skip, "")
}
logger := klog.FromContext(ctx)
logger = klog.LoggerWithName(logger, "Bind")
// TODO(knelasevero): Remove duplicated keys from log entry calls
// When contextualized logging hits GA
// https://github.com/kubernetes/kubernetes/issues/111672
logger = klog.LoggerWithValues(logger, "pod", klog.KObj(pod), "node", klog.ObjectRef{Name: nodeName})
verboseLogs := logger.V(4).Enabled()
if verboseLogs {
logger = klog.LoggerWithName(logger, "Bind")
}
for _, pl := range f.bindPlugins {
logger := klog.LoggerWithName(logger, pl.Name())
ctx := klog.NewContext(ctx, logger)
ctx := ctx
if verboseLogs {
logger := klog.LoggerWithName(logger, pl.Name())
ctx = klog.NewContext(ctx, logger)
}
status = f.runBindPlugin(ctx, pl, state, pod, nodeName)
if status.IsSkip() {
continue
@ -1243,14 +1265,16 @@ func (f *frameworkImpl) RunPostBindPlugins(ctx context.Context, state *framework
metrics.FrameworkExtensionPointDuration.WithLabelValues(metrics.PostBind, framework.Success.String(), f.profileName).Observe(metrics.SinceInSeconds(startTime))
}()
logger := klog.FromContext(ctx)
logger = klog.LoggerWithName(logger, "PostBind")
// TODO(knelasevero): Remove duplicated keys from log entry calls
// When contextualized logging hits GA
// https://github.com/kubernetes/kubernetes/issues/111672
logger = klog.LoggerWithValues(logger, "pod", klog.KObj(pod), "node", klog.ObjectRef{Name: nodeName})
verboseLogs := logger.V(4).Enabled()
if verboseLogs {
logger = klog.LoggerWithName(logger, "PostBind")
}
for _, pl := range f.postBindPlugins {
logger := klog.LoggerWithName(logger, pl.Name())
ctx := klog.NewContext(ctx, logger)
ctx := ctx
if verboseLogs {
logger := klog.LoggerWithName(logger, pl.Name())
ctx = klog.NewContext(ctx, logger)
}
f.runPostBindPlugin(ctx, pl, state, pod, nodeName)
}
}
@ -1276,14 +1300,17 @@ func (f *frameworkImpl) RunReservePluginsReserve(ctx context.Context, state *fra
metrics.FrameworkExtensionPointDuration.WithLabelValues(metrics.Reserve, status.Code().String(), f.profileName).Observe(metrics.SinceInSeconds(startTime))
}()
logger := klog.FromContext(ctx)
logger = klog.LoggerWithName(logger, "Reserve")
// TODO(knelasevero): Remove duplicated keys from log entry calls
// When contextualized logging hits GA
// https://github.com/kubernetes/kubernetes/issues/111672
logger = klog.LoggerWithValues(logger, "pod", klog.KObj(pod), "node", klog.ObjectRef{Name: nodeName})
verboseLogs := logger.V(4).Enabled()
if verboseLogs {
logger = klog.LoggerWithName(logger, "Reserve")
logger = klog.LoggerWithValues(logger, "node", klog.ObjectRef{Name: nodeName})
}
for _, pl := range f.reservePlugins {
logger := klog.LoggerWithName(logger, pl.Name())
ctx := klog.NewContext(ctx, logger)
ctx := ctx
if verboseLogs {
logger := klog.LoggerWithName(logger, pl.Name())
ctx = klog.NewContext(ctx, logger)
}
status = f.runReservePluginReserve(ctx, pl, state, pod, nodeName)
if !status.IsSuccess() {
if status.IsRejected() {
@ -1319,15 +1346,19 @@ func (f *frameworkImpl) RunReservePluginsUnreserve(ctx context.Context, state *f
// Execute the Unreserve operation of each reserve plugin in the
// *reverse* order in which the Reserve operation was executed.
logger := klog.FromContext(ctx)
logger = klog.LoggerWithName(logger, "Unreserve")
// TODO(knelasevero): Remove duplicated keys from log entry calls
// When contextualized logging hits GA
// https://github.com/kubernetes/kubernetes/issues/111672
logger = klog.LoggerWithValues(logger, "pod", klog.KObj(pod), "node", klog.ObjectRef{Name: nodeName})
verboseLogs := logger.V(4).Enabled()
if verboseLogs {
logger = klog.LoggerWithName(logger, "Unreserve")
logger = klog.LoggerWithValues(logger, "node", klog.ObjectRef{Name: nodeName})
}
for i := len(f.reservePlugins) - 1; i >= 0; i-- {
logger := klog.LoggerWithName(logger, f.reservePlugins[i].Name())
ctx := klog.NewContext(ctx, logger)
f.runReservePluginUnreserve(ctx, f.reservePlugins[i], state, pod, nodeName)
pl := f.reservePlugins[i]
ctx := ctx
if verboseLogs {
logger := klog.LoggerWithName(logger, pl.Name())
ctx = klog.NewContext(ctx, logger)
}
f.runReservePluginUnreserve(ctx, pl, state, pod, nodeName)
}
}
@ -1355,14 +1386,17 @@ func (f *frameworkImpl) RunPermitPlugins(ctx context.Context, state *framework.C
pluginsWaitTime := make(map[string]time.Duration)
statusCode := framework.Success
logger := klog.FromContext(ctx)
logger = klog.LoggerWithName(logger, "Permit")
// TODO(knelasevero): Remove duplicated keys from log entry calls
// When contextualized logging hits GA
// https://github.com/kubernetes/kubernetes/issues/111672
logger = klog.LoggerWithValues(logger, "pod", klog.KObj(pod), "node", klog.ObjectRef{Name: nodeName})
verboseLogs := logger.V(4).Enabled()
if verboseLogs {
logger = klog.LoggerWithName(logger, "Permit")
logger = klog.LoggerWithValues(logger, "node", klog.ObjectRef{Name: nodeName})
}
for _, pl := range f.permitPlugins {
logger := klog.LoggerWithName(logger, pl.Name())
ctx := klog.NewContext(ctx, logger)
ctx := ctx
if verboseLogs {
logger := klog.LoggerWithName(logger, pl.Name())
ctx = klog.NewContext(ctx, logger)
}
status, timeout := f.runPermitPlugin(ctx, pl, state, pod, nodeName)
if !status.IsSuccess() {
if status.IsRejected() {

View File

@ -77,6 +77,11 @@ func (sched *Scheduler) scheduleOne(ctx context.Context) {
}
pod := podInfo.Pod
// TODO(knelasevero): Remove duplicated keys from log entry calls
// When contextualized logging hits GA
// https://github.com/kubernetes/kubernetes/issues/111672
logger = klog.LoggerWithValues(logger, "pod", klog.KObj(pod))
ctx = klog.NewContext(ctx, logger)
logger.V(4).Info("About to try and schedule pod", "pod", klog.KObj(pod))
fwk, err := sched.frameworkForPod(pod)