enable lint in metrics test utils and add legacy invalid metrics to exception list.

This commit is contained in:
RainbowMango 2020-06-12 16:54:03 +08:00
parent b8f24173da
commit 1a26b3eeb5
3 changed files with 126 additions and 6 deletions

View File

@ -17,7 +17,9 @@ limitations under the License.
package testutil
import (
"fmt"
"io"
"strings"
"github.com/prometheus/client_golang/prometheus/testutil/promlint"
)
@ -28,14 +30,26 @@ import (
// We setup this list for allow and not fail on the current violations.
// Generally speaking, you need to fix the problem for a new metric rather than add it into the list.
var exceptionMetrics = []string{
// k8s.io/kubernetes/vendor/k8s.io/apiserver/pkg/server/egressselector
"apiserver_egress_dialer_dial_failure_count", // counter metrics should have "_total" suffix
// k8s.io/apiserver/pkg/util/flowcontrol/fairqueuing/queueset
"apiserver_flowcontrol_current_inqueue_requests", // label names should be written in 'snake_case' not 'camelCase',
"apiserver_flowcontrol_current_executing_requests", // label names should be written in 'snake_case' not 'camelCase'
"apiserver_flowcontrol_rejected_requests_total", // label names should be written in 'snake_case' not 'camelCase'
// k8s.io/kubernetes/vendor/k8s.io/apiserver/pkg/server/healthz
"apiserver_request_total", // label names should be written in 'snake_case' not 'camelCase'
// k8s.io/kubernetes/vendor/k8s.io/apiserver/pkg/endpoints/filters
"authenticated_user_requests", // counter metrics should have "_total" suffix
"authentication_attempts", // counter metrics should have "_total" suffix
// kube-apiserver
"aggregator_openapi_v2_regeneration_count",
"apiserver_admission_step_admission_duration_seconds_summary",
"apiserver_current_inflight_requests",
"apiserver_longrunning_gauge",
"apiserver_request_total",
"authenticated_user_requests",
"authentication_attempts",
"get_token_count",
"get_token_fail_count",
"ssh_tunnel_open_count",
@ -49,14 +63,22 @@ var exceptionMetrics = []string{
"get_token_fail_count",
"node_collector_evictions_number",
// kubelet-resource-v1alpha1
"container_cpu_usage_seconds_total",
"node_cpu_usage_seconds_total",
// k8s.io/kubernetes/pkg/kubelet/server/stats
"container_cpu_usage_seconds_total", // non-counter metrics should not have "_total" suffix
"node_cpu_usage_seconds_total", // non-counter metrics should not have "_total" suffix
// k8s.io/kubernetes/pkg/kubelet/pleg
"kubelet_running_container_count", // non-histogram and non-summary metrics should not have "_count" suffix
"kubelet_running_pod_count", // non-histogram and non-summary metrics should not have "_count" suffix
}
// A Problem is an issue detected by a Linter.
type Problem promlint.Problem
func (p *Problem) String() string {
return fmt.Sprintf("%s:%s", p.Metric, p.Text)
}
// A Linter is a Prometheus metrics linter. It identifies issues with metric
// names, types, and metadata, and reports them to the caller.
type Linter struct {
@ -101,3 +123,42 @@ func NewPromLinter(r io.Reader) *Linter {
promLinter: promlint.New(r),
}
}
func mergeProblems(problems []Problem) string {
var problemsMsg []string
for index := range problems {
problemsMsg = append(problemsMsg, problems[index].String())
}
return strings.Join(problemsMsg, ",")
}
// shouldIgnore returns true if metric in the exception list, otherwise returns false.
func shouldIgnore(metricName string) bool {
for i := range exceptionMetrics {
if metricName == exceptionMetrics[i] {
return true
}
}
return false
}
// getLintError will ignore the metrics in exception list and converts lint problem to error.
func getLintError(problems []promlint.Problem) error {
var filteredProblems []Problem
for _, problem := range problems {
if shouldIgnore(problem.Metric) {
continue
}
filteredProblems = append(filteredProblems, Problem(problem))
}
if len(filteredProblems) == 0 {
return nil
}
return fmt.Errorf("lint error: %s", mergeProblems(filteredProblems))
}

View File

@ -59,3 +59,46 @@ func TestLinter(t *testing.T) {
})
}
}
func TestMergeProblems(t *testing.T) {
problemOne := Problem{
Metric: "metric_one",
Text: "problem one",
}
problemTwo := Problem{
Metric: "metric_two",
Text: "problem two",
}
var tests = []struct {
name string
problems []Problem
expected string
}{
{
name: "no problem",
problems: nil,
expected: "",
},
{
name: "one problem",
problems: []Problem{problemOne},
expected: "metric_one:problem one",
},
{
name: "more than one problem",
problems: []Problem{problemOne, problemTwo},
expected: "metric_one:problem one,metric_two:problem two",
},
}
for _, test := range tests {
tc := test
t.Run(tc.name, func(t *testing.T) {
got := mergeProblems(tc.problems)
if tc.expected != got {
t.Errorf("expected: %s, but got: %s", tc.expected, got)
}
})
}
}

View File

@ -30,6 +30,14 @@ import (
// pedantic Registry. It then does the same as GatherAndCompare, gathering the
// metrics from the pedantic Registry.
func CollectAndCompare(c metrics.Collector, expected io.Reader, metricNames ...string) error {
lintProblems, err := testutil.CollectAndLint(c, metricNames...)
if err != nil {
return err
}
if err := getLintError(lintProblems); err != nil {
return err
}
return testutil.CollectAndCompare(c, expected, metricNames...)
}
@ -38,6 +46,14 @@ func CollectAndCompare(c metrics.Collector, expected io.Reader, metricNames ...s
// exposition format. If any metricNames are provided, only metrics with those
// names are compared.
func GatherAndCompare(g metrics.Gatherer, expected io.Reader, metricNames ...string) error {
lintProblems, err := testutil.GatherAndLint(g, metricNames...)
if err != nil {
return err
}
if err := getLintError(lintProblems); err != nil {
return err
}
return testutil.GatherAndCompare(g, expected, metricNames...)
}