scheduler_perf: track and visualize progress over time

This is useful to see whether pod scheduling happens in bursts and how it
behaves over time, which is relevant in particular for dynamic resource
allocation where it may become harder at the end to find the node which still
has resources available.

Besides "pods scheduled" it's also useful to know how many attempts were
needed, so schedule_attempts_total also gets sampled and stored.

To visualize the result of one or more test runs, use:

     gnuplot.sh *.dat
This commit is contained in:
Patrick Ohly 2023-01-27 19:31:37 +01:00
parent ded96042f7
commit d100768d94
6 changed files with 293 additions and 7 deletions

View File

@ -258,12 +258,8 @@ func GetHistogramVecFromGatherer(gatherer metrics.Gatherer, metricName string, l
if err != nil {
return nil, err
}
for _, mFamily := range m {
if mFamily.GetName() == metricName {
metricFamily = mFamily
break
}
}
metricFamily = findMetricFamily(m, metricName)
if metricFamily == nil {
return nil, fmt.Errorf("metric %q not found", metricName)
@ -433,3 +429,47 @@ func LabelsMatch(metric *dto.Metric, labelFilter map[string]string) bool {
return true
}
// GetCounterVecFromGatherer collects a counter that matches the given name
// from a gatherer implementing k8s.io/component-base/metrics.Gatherer interface.
// It returns all counter values that had a label with a certain name in a map
// that uses the label value as keys.
//
// Used only for testing purposes where we need to gather metrics directly from a running binary (without metrics endpoint).
func GetCounterValuesFromGatherer(gatherer metrics.Gatherer, metricName string, lvMap map[string]string, labelName string) (map[string]float64, error) {
m, err := gatherer.Gather()
if err != nil {
return nil, err
}
metricFamily := findMetricFamily(m, metricName)
if metricFamily == nil {
return nil, fmt.Errorf("metric %q not found", metricName)
}
if len(metricFamily.GetMetric()) == 0 {
return nil, fmt.Errorf("metric %q is empty", metricName)
}
values := make(map[string]float64)
for _, metric := range metricFamily.GetMetric() {
if LabelsMatch(metric, lvMap) {
if counter := metric.GetCounter(); counter != nil {
for _, labelPair := range metric.Label {
if labelPair.GetName() == labelName {
values[labelPair.GetValue()] = counter.GetValue()
}
}
}
}
}
return values, nil
}
func findMetricFamily(metricFamilies []*dto.MetricFamily, metricName string) *dto.MetricFamily {
for _, mFamily := range metricFamilies {
if mFamily.GetName() == metricName {
return mFamily
}
}
return nil
}

View File

@ -20,6 +20,7 @@ import (
"fmt"
"math"
"reflect"
"strings"
"testing"
"github.com/google/go-cmp/cmp"
@ -591,3 +592,104 @@ func TestGetHistogramVecFromGatherer(t *testing.T) {
})
}
}
func TestGetCounterValuesFromGatherer(t *testing.T) {
namespace := "namespace"
subsystem := "subsystem"
name := "metric_test_name"
metricName := fmt.Sprintf("%s_%s_%s", namespace, subsystem, name)
tests := map[string]struct {
metricName string // Empty is replaced with valid name.
lvMap map[string]string
labelName string
wantCounterValues map[string]float64
wantErr string
}{
"wrong-metric": {
metricName: "no-such-metric",
wantErr: `metric "no-such-metric" not found`,
},
"none": {
metricName: metricName,
lvMap: map[string]string{"no-such-label": "a"},
wantCounterValues: map[string]float64{},
},
"value1-0": {
metricName: metricName,
lvMap: map[string]string{"label1": "value1-0"},
labelName: "label2",
wantCounterValues: map[string]float64{"value2-0": 1.5, "value2-1": 2.5},
},
"value1-1": {
metricName: metricName,
lvMap: map[string]string{"label1": "value1-1"},
labelName: "label2",
wantCounterValues: map[string]float64{"value2-0": 3.5, "value2-1": 4.5},
},
"value1-1-value2-0-none": {
metricName: metricName,
lvMap: map[string]string{"label1": "value1-1", "label2": "value2-0"},
labelName: "none",
wantCounterValues: map[string]float64{},
},
"value1-0-value2-0-one": {
metricName: metricName,
lvMap: map[string]string{"label1": "value1-0", "label2": "value2-0"},
labelName: "label2",
wantCounterValues: map[string]float64{"value2-0": 1.5},
},
}
for name, tt := range tests {
t.Run(name, func(t *testing.T) {
// CounterVec has two labels defined.
labels := []string{"label1", "label2"}
counterOpts := &metrics.CounterOpts{
Namespace: "namespace",
Name: "metric_test_name",
Subsystem: "subsystem",
Help: "counter help message",
}
vec := metrics.NewCounterVec(counterOpts, labels)
// Use local registry
var registry = metrics.NewKubeRegistry()
var gather metrics.Gatherer = registry
registry.MustRegister(vec)
// Observe two metrics with same value for label1 but different value of label2.
vec.WithLabelValues("value1-0", "value2-0").Add(1.5)
vec.WithLabelValues("value1-0", "value2-1").Add(2.5)
vec.WithLabelValues("value1-1", "value2-0").Add(3.5)
vec.WithLabelValues("value1-1", "value2-1").Add(4.5)
// The check for empty metric apparently cannot be tested: registering
// a NewCounterVec with no values has the affect that it doesn't get
// returned, leading to "not found".
counterValues, err := GetCounterValuesFromGatherer(gather, tt.metricName, tt.lvMap, tt.labelName)
if err != nil {
if tt.wantErr != "" && !strings.Contains(err.Error(), tt.wantErr) {
t.Errorf("expected error %q, got instead: %v", tt.wantErr, err)
}
return
}
if tt.wantErr != "" {
t.Fatalf("expected error %q, got none", tt.wantErr)
}
if diff := cmp.Diff(tt.wantCounterValues, counterValues); diff != "" {
t.Errorf("Got unexpected HistogramVec (-want +got):\n%s", diff)
}
})
}
}

View File

@ -175,3 +175,22 @@ the ci-benchmark-scheduler-perf periodic job will fail with an error log such as
This allows to analyze which workload failed. Make sure that the failure is not an outlier
by checking multiple runs of the job. If the failures are not related to any regression,
but to an incorrect threshold setting, it is reasonable to decrease it.
### Visualization
Some support for visualizing progress over time is built into the
benchmarks. The measurement operation which creates pods writes .dat files like
this:
test/integration/scheduler_perf/SchedulingBasic_5000Nodes_2023-03-17T14:52:09Z.dat
This file is in a text format that [gnuplot](http://www.gnuplot.info/) can
read. A wrapper script selects some suitable parameters:
test/integration/scheduler_perf/gnuplot.sh test/integration/scheduler_perf/*.dat
It plots in an interactive window by default. To write into a file, use
test/integration/scheduler_perf/gnuplot.sh \
-e 'set term png; set output "<output>.png"' \
test/integration/scheduler_perf/*.dat

View File

@ -0,0 +1,54 @@
#!/usr/bin/env bash
# Copyright 2024 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Invoke this script with a list of *.dat and it'll plot them with gnuplot.
# Any non-file parameter is passed through to gnuplot. By default,
# an X11 window is used to display the result. To write into a file,
# use
# -e "set term png; set output <output>.png"
files=()
args=( -e "set term x11 persist" )
for i in "$@"; do
if [ -f "$i" ]; then
files+=("$i")
else
args+=("$i")
fi
done
(
cat <<EOF
set ytics autofreq nomirror tc lt 1
set xlabel 'measurement runtime [seconds]'
set ylabel 'scheduling rate [pods/second]' tc lt 1
set y2tics autofreq nomirror tc lt 2
set y2label 'scheduling attempts per pod' tc lt 2
# Derivative from https://stackoverflow.com/questions/15751226/how-can-i-plot-the-derivative-of-a-graph-in-gnuplot.
d2(x,y) = (\$0 == 0) ? (x1 = x, y1 = y, 1/0) : (x2 = x1, x1 = x, y2 = y1, y1 = y, (y1-y2)/(x1-x2))
dx = 0.25
EOF
echo -n "plot "
for file in "${files[@]}"; do
echo -n "'${file}' using (\$1 - dx):(d2(\$1, \$2)) with linespoints title '$(basename "$file" .dat | sed -e 's/_/ /g') metric rate' axis x1y1, "
echo -n "'${file}' using (\$1 - dx):(d2(\$1, \$4)) with linespoints title '$(basename "$file" .dat | sed -e 's/_/ /g') observed rate' axis x1y1, "
echo -n "'${file}' using 1:(\$3/\$2) with linespoints title '$(basename "$file" .dat | sed -e 's/_/ /g') attempts' axis x1y2, "
done
echo
) | tee /dev/stderr | gnuplot "${args[@]}" -

View File

@ -1095,6 +1095,30 @@ func RunBenchmarkPerfScheduling(b *testing.B, outOfTreePluginRegistry frameworkr
// Reset metrics to prevent metrics generated in current workload gets
// carried over to the next workload.
legacyregistry.Reset()
// Exactly one result is expected to contain the progress information.
for _, item := range results {
if len(item.progress) == 0 {
continue
}
destFile, err := dataFilename(strings.ReplaceAll(fmt.Sprintf("%s_%s_%s.dat", tc.Name, w.Name, runID), "/", "_"))
if err != nil {
b.Fatalf("prepare data file: %v", err)
}
f, err := os.Create(destFile)
if err != nil {
b.Fatalf("create data file: %v", err)
}
// Print progress over time.
for _, sample := range item.progress {
fmt.Fprintf(f, "%.1fs %d %d %d %f\n", sample.ts.Sub(item.start).Seconds(), sample.completed, sample.attempts, sample.observedTotal, sample.observedRate)
}
if err := f.Close(); err != nil {
b.Fatalf("closing data file: %v", err)
}
}
})
}
})

View File

@ -63,6 +63,8 @@ const (
var dataItemsDir = flag.String("data-items-dir", "", "destination directory for storing generated data items for perf dashboard")
var runID = time.Now().Format(dateFormat)
func newDefaultComponentConfig() (*config.KubeSchedulerConfiguration, error) {
gvk := kubeschedulerconfigv1.SchemeGroupVersion.WithKind("KubeSchedulerConfiguration")
cfg := config.KubeSchedulerConfiguration{}
@ -180,6 +182,10 @@ type DataItem struct {
Unit string `json:"unit"`
// Labels is the labels of the data item.
Labels map[string]string `json:"labels,omitempty"`
// progress contains number of scheduled pods over time.
progress []podScheduling
start time.Time
}
// DataItems is the data point set. It is the struct that perf dashboard expects.
@ -188,6 +194,14 @@ type DataItems struct {
DataItems []DataItem `json:"dataItems"`
}
type podScheduling struct {
ts time.Time
attempts int
completed int
observedTotal int
observedRate float64
}
// makeBasePod creates a Pod object to be used as a template.
func makeBasePod() *v1.Pod {
basePod := &v1.Pod{
@ -241,6 +255,17 @@ func dataItems2JSONFile(dataItems DataItems, namePrefix string) error {
return os.WriteFile(destFile, formatted.Bytes(), 0644)
}
func dataFilename(destFile string) (string, error) {
if *dataItemsDir != "" {
// Ensure the "dataItemsDir" path is valid.
if err := os.MkdirAll(*dataItemsDir, 0750); err != nil {
return "", fmt.Errorf("dataItemsDir path %v does not exist and cannot be created: %w", *dataItemsDir, err)
}
destFile = path.Join(*dataItemsDir, destFile)
}
return destFile, nil
}
type labelValues struct {
label string
values []string
@ -383,6 +408,9 @@ type throughputCollector struct {
labels map[string]string
namespaces sets.Set[string]
errorMargin float64
progress []podScheduling
start time.Time
}
func newThroughputCollector(podInformer coreinformers.PodInformer, labels map[string]string, namespaces []string, errorMargin float64) *throughputCollector {
@ -492,6 +520,7 @@ func (tc *throughputCollector) run(tCtx ktesting.TContext) {
// sampling and creating pods get started independently.
lastScheduledCount = scheduled
lastSampleTime = now
tc.start = now
continue
}
@ -524,6 +553,20 @@ func (tc *throughputCollector) run(tCtx ktesting.TContext) {
for i := 0; i <= skipped; i++ {
tc.schedulingThroughputs = append(tc.schedulingThroughputs, throughput)
}
// Record the metric sample.
counters, err := testutil.GetCounterValuesFromGatherer(legacyregistry.DefaultGatherer, "scheduler_schedule_attempts_total", map[string]string{"profile": "default-scheduler"}, "result")
if err != nil {
klog.Error(err)
}
tc.progress = append(tc.progress, podScheduling{
ts: now,
attempts: int(counters["unschedulable"] + counters["error"] + counters["scheduled"]),
completed: int(counters["scheduled"]),
observedTotal: scheduled,
observedRate: throughput,
})
lastScheduledCount = scheduled
klog.Infof("%d pods have been scheduled successfully", lastScheduledCount)
skipped = 0
@ -533,7 +576,11 @@ func (tc *throughputCollector) run(tCtx ktesting.TContext) {
}
func (tc *throughputCollector) collect() []DataItem {
throughputSummary := DataItem{Labels: tc.labels}
throughputSummary := DataItem{
Labels: tc.labels,
progress: tc.progress,
start: tc.start,
}
if length := len(tc.schedulingThroughputs); length > 0 {
sort.Float64s(tc.schedulingThroughputs)
sum := 0.0