Merge pull request #118292 from cchapla/crd_webhook_metrics

Webhook conversion metrics [request/error counts and latency metrics]
This commit is contained in:
Kubernetes Prow Robot 2023-06-06 18:00:12 -07:00 committed by GitHub
commit 9ede836ea2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 349 additions and 1 deletions

View File

@ -17,6 +17,7 @@ limitations under the License.
package conversion
import (
"context"
"strconv"
"sync"
"time"
@ -86,3 +87,72 @@ func (m *converterMetric) Convert(in runtime.Object, targetGV schema.GroupVersio
}
return obj, err
}
type WebhookConversionErrorType string
const (
WebhookConversionCallFailure WebhookConversionErrorType = "webhook_conversion_call_failure"
WebhookConversionMalformedResponseFailure WebhookConversionErrorType = "webhook_conversion_malformed_response_failure"
WebhookConversionPartialResponseFailure WebhookConversionErrorType = "webhook_conversion_partial_response_failure"
WebhookConversionInvalidConvertedObjectFailure WebhookConversionErrorType = "webhook_conversion_invalid_converted_object_failure"
WebhookConversionNoObjectsReturnedFailure WebhookConversionErrorType = "webhook_conversion_no_objects_returned_failure"
)
var (
Metrics = newWebhookConversionMetrics()
namespace = "apiserver"
)
// WebhookConversionMetrics instruments webhook conversion with prometheus metrics.
type WebhookConversionMetrics struct {
webhookConversionRequest *metrics.CounterVec
webhookConversionLatency *metrics.HistogramVec
}
func newWebhookConversionMetrics() *WebhookConversionMetrics {
webhookConversionRequest := metrics.NewCounterVec(
&metrics.CounterOpts{
Name: "webhook_conversion_request_total",
Namespace: namespace,
Help: "Counter for webhook conversion requests with success/failure and failure error type",
StabilityLevel: metrics.ALPHA,
},
[]string{"result", "failure_type"})
webhookConversionLatency := metrics.NewHistogramVec(
&metrics.HistogramOpts{
Name: "webhook_conversion_duration_seconds",
Namespace: namespace,
Help: "Webhook conversion request latency",
// Various buckets from 5 ms to 60 seconds
Buckets: []float64{0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1, 2, 5, 10, 20, 30, 45, 60},
StabilityLevel: metrics.ALPHA,
},
[]string{"result", "failure_type"},
)
legacyregistry.MustRegister(webhookConversionRequest)
legacyregistry.MustRegister(webhookConversionLatency)
return &WebhookConversionMetrics{webhookConversionRequest: webhookConversionRequest, webhookConversionLatency: webhookConversionLatency}
}
// Observe successful request
func (m *WebhookConversionMetrics) ObserveWebhookConversionSuccess(ctx context.Context, elapsed time.Duration) {
result := "success"
m.webhookConversionRequest.WithContext(ctx).WithLabelValues(result, "").Inc()
m.observe(ctx, elapsed, result, "")
}
// Observe failure with failure type
func (m *WebhookConversionMetrics) ObserveWebhookConversionFailure(ctx context.Context, elapsed time.Duration, errorType WebhookConversionErrorType) {
result := "failure"
m.webhookConversionRequest.WithContext(ctx).WithLabelValues(result, string(errorType)).Inc()
m.observe(ctx, elapsed, result, errorType)
}
// Observe latency
func (m *WebhookConversionMetrics) observe(ctx context.Context, elapsed time.Duration, result string, errorType WebhookConversionErrorType) {
elapsedSeconds := elapsed.Seconds()
m.webhookConversionLatency.WithContext(ctx).WithLabelValues(result, string(errorType)).Observe(elapsedSeconds)
}

View File

@ -0,0 +1,197 @@
/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package conversion
import (
"context"
"fmt"
"testing"
"time"
"k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/testutil"
)
func TestWebhookConversionMetrics_ObserveWebhookConversionSuccess(t *testing.T) {
type fields struct {
webhookConversionRequest *metrics.CounterVec
webhookConversionLatency *metrics.HistogramVec
}
type args struct {
elapsed time.Duration
}
tests := []struct {
name string
fields fields
args args
wantLabels map[string]string
expectedRequestValue int
}{
{
name: "test_conversion_success",
fields: fields{
webhookConversionRequest: Metrics.webhookConversionRequest,
webhookConversionLatency: Metrics.webhookConversionLatency,
},
args: args{
elapsed: 2 * time.Second,
},
wantLabels: map[string]string{
"result": "success",
"failure_type": "",
},
expectedRequestValue: 1,
}, {
name: "test_conversion_success_2",
fields: fields{
webhookConversionRequest: Metrics.webhookConversionRequest,
webhookConversionLatency: Metrics.webhookConversionLatency,
},
args: args{
elapsed: 2 * time.Second,
},
wantLabels: map[string]string{
"result": "success",
"failure_type": "",
},
expectedRequestValue: 2,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
m := &WebhookConversionMetrics{
webhookConversionRequest: tt.fields.webhookConversionRequest,
webhookConversionLatency: tt.fields.webhookConversionLatency,
}
m.ObserveWebhookConversionSuccess(context.TODO(), tt.args.elapsed)
testutil.AssertVectorCount(t, fmt.Sprintf("%s_webhook_conversion_request_total", namespace), tt.wantLabels, tt.expectedRequestValue)
testutil.AssertHistogramTotalCount(t, fmt.Sprintf("%s_webhook_conversion_duration_seconds", namespace), tt.wantLabels, tt.expectedRequestValue)
})
}
}
func TestWebhookConversionMetrics_ObserveWebhookConversionFailure(t *testing.T) {
type fields struct {
webhookConversionRequest *metrics.CounterVec
webhookConversionLatency *metrics.HistogramVec
}
type args struct {
elapsed time.Duration
errorType WebhookConversionErrorType
}
tests := []struct {
name string
fields fields
args args
wantLabels map[string]string
expectedRequestValue int
expectedLatencyCount int
}{
{
name: "test_conversion_failure",
fields: fields{
webhookConversionRequest: Metrics.webhookConversionRequest,
webhookConversionLatency: Metrics.webhookConversionLatency,
},
args: args{
elapsed: 2 * time.Second,
errorType: WebhookConversionCallFailure,
},
wantLabels: map[string]string{
"result": "failure",
"failure_type": string(WebhookConversionCallFailure),
},
expectedRequestValue: 1,
expectedLatencyCount: 1,
}, {
name: "test_conversion_failure_2",
fields: fields{
webhookConversionRequest: Metrics.webhookConversionRequest,
webhookConversionLatency: Metrics.webhookConversionLatency,
},
args: args{
elapsed: 2 * time.Second,
errorType: WebhookConversionMalformedResponseFailure,
},
wantLabels: map[string]string{
"result": "failure",
"failure_type": string(WebhookConversionMalformedResponseFailure),
},
expectedRequestValue: 1,
expectedLatencyCount: 2,
}, {
name: "test_conversion_failure_3",
fields: fields{
webhookConversionRequest: Metrics.webhookConversionRequest,
webhookConversionLatency: Metrics.webhookConversionLatency,
},
args: args{
elapsed: 2 * time.Second,
errorType: WebhookConversionPartialResponseFailure,
},
wantLabels: map[string]string{
"result": "failure",
"failure_type": string(WebhookConversionPartialResponseFailure),
},
expectedRequestValue: 1,
expectedLatencyCount: 3,
}, {
name: "test_conversion_failure_4",
fields: fields{
webhookConversionRequest: Metrics.webhookConversionRequest,
webhookConversionLatency: Metrics.webhookConversionLatency,
},
args: args{
elapsed: 2 * time.Second,
errorType: WebhookConversionInvalidConvertedObjectFailure,
},
wantLabels: map[string]string{
"result": "failure",
"failure_type": string(WebhookConversionInvalidConvertedObjectFailure),
},
expectedRequestValue: 1,
expectedLatencyCount: 4,
}, {
name: "test_conversion_failure_5",
fields: fields{
webhookConversionRequest: Metrics.webhookConversionRequest,
webhookConversionLatency: Metrics.webhookConversionLatency,
},
args: args{
elapsed: 2 * time.Second,
errorType: WebhookConversionNoObjectsReturnedFailure,
},
wantLabels: map[string]string{
"result": "failure",
"failure_type": string(WebhookConversionNoObjectsReturnedFailure),
},
expectedRequestValue: 1,
expectedLatencyCount: 5,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
m := &WebhookConversionMetrics{
webhookConversionRequest: tt.fields.webhookConversionRequest,
webhookConversionLatency: tt.fields.webhookConversionLatency,
}
m.ObserveWebhookConversionFailure(context.TODO(), tt.args.elapsed, tt.args.errorType)
testutil.AssertVectorCount(t, fmt.Sprintf("%s_webhook_conversion_request_total", namespace), tt.wantLabels, tt.expectedRequestValue)
testutil.AssertHistogramTotalCount(t, fmt.Sprintf("%s_webhook_conversion_duration_seconds", namespace), tt.wantLabels, tt.expectedRequestValue)
})
}
}

View File

@ -237,7 +237,7 @@ func (c *webhookConverter) Convert(in runtime.Object, toGV schema.GroupVersion)
if isEmptyUnstructuredObject(in) {
return c.nopConverter.Convert(in, toGV)
}
t := time.Now()
listObj, isList := in.(*unstructured.UnstructuredList)
requestUID := uuid.NewUUID()
@ -250,6 +250,7 @@ func (c *webhookConverter) Convert(in runtime.Object, toGV schema.GroupVersion)
objCount := len(objectsToConvert)
if objCount == 0 {
Metrics.ObserveWebhookConversionSuccess(ctx, time.Since(t))
// no objects needed conversion
if !isList {
// for a single item, return as-is
@ -275,16 +276,19 @@ func (c *webhookConverter) Convert(in runtime.Object, toGV schema.GroupVersion)
r := c.restClient.Post().Body(request).Do(ctx)
if err := r.Into(response); err != nil {
// TODO: Return a webhook specific error to be able to convert it to meta.Status
Metrics.ObserveWebhookConversionFailure(ctx, time.Since(t), WebhookConversionCallFailure)
return nil, fmt.Errorf("conversion webhook for %v failed: %v", in.GetObjectKind().GroupVersionKind(), err)
}
span.AddEvent("Request completed")
convertedObjects, err := getConvertedObjectsFromResponse(requestUID, response)
if err != nil {
Metrics.ObserveWebhookConversionFailure(ctx, time.Since(t), WebhookConversionMalformedResponseFailure)
return nil, fmt.Errorf("conversion webhook for %v failed: %v", in.GetObjectKind().GroupVersionKind(), err)
}
if len(convertedObjects) != len(objectsToConvert) {
Metrics.ObserveWebhookConversionFailure(ctx, time.Since(t), WebhookConversionPartialResponseFailure)
return nil, fmt.Errorf("conversion webhook for %v returned %d objects, expected %d", in.GetObjectKind().GroupVersionKind(), len(convertedObjects), len(objectsToConvert))
}
@ -302,62 +306,78 @@ func (c *webhookConverter) Convert(in runtime.Object, toGV schema.GroupVersion)
}
converted, err := getRawExtensionObject(convertedObjects[convertedIndex])
if err != nil {
Metrics.ObserveWebhookConversionFailure(ctx, time.Since(t), WebhookConversionInvalidConvertedObjectFailure)
return nil, fmt.Errorf("conversion webhook for %v returned invalid converted object at index %v: %v", in.GetObjectKind().GroupVersionKind(), convertedIndex, err)
}
convertedIndex++
if expected, got := toGV, converted.GetObjectKind().GroupVersionKind().GroupVersion(); expected != got {
Metrics.ObserveWebhookConversionFailure(ctx, time.Since(t), WebhookConversionInvalidConvertedObjectFailure)
return nil, fmt.Errorf("conversion webhook for %v returned invalid converted object at index %v: invalid groupVersion (expected %v, received %v)", in.GetObjectKind().GroupVersionKind(), convertedIndex, expected, got)
}
if expected, got := original.GetObjectKind().GroupVersionKind().Kind, converted.GetObjectKind().GroupVersionKind().Kind; expected != got {
Metrics.ObserveWebhookConversionFailure(ctx, time.Since(t), WebhookConversionInvalidConvertedObjectFailure)
return nil, fmt.Errorf("conversion webhook for %v returned invalid converted object at index %v: invalid kind (expected %v, received %v)", in.GetObjectKind().GroupVersionKind(), convertedIndex, expected, got)
}
unstructConverted, ok := converted.(*unstructured.Unstructured)
if !ok {
// this should not happened
Metrics.ObserveWebhookConversionFailure(ctx, time.Since(t), WebhookConversionInvalidConvertedObjectFailure)
return nil, fmt.Errorf("conversion webhook for %v returned invalid converted object at index %v: invalid type, expected=Unstructured, got=%T", in.GetObjectKind().GroupVersionKind(), convertedIndex, converted)
}
if err := validateConvertedObject(original, unstructConverted); err != nil {
Metrics.ObserveWebhookConversionFailure(ctx, time.Since(t), WebhookConversionInvalidConvertedObjectFailure)
return nil, fmt.Errorf("conversion webhook for %v returned invalid converted object at index %v: %v", in.GetObjectKind().GroupVersionKind(), convertedIndex, err)
}
if err := restoreObjectMeta(original, unstructConverted); err != nil {
Metrics.ObserveWebhookConversionFailure(ctx, time.Since(t), WebhookConversionInvalidConvertedObjectFailure)
return nil, fmt.Errorf("conversion webhook for %v returned invalid metadata in object at index %v: %v", in.GetObjectKind().GroupVersionKind(), convertedIndex, err)
}
convertedList.Items[i] = *unstructConverted
}
convertedList.SetAPIVersion(toGV.String())
Metrics.ObserveWebhookConversionSuccess(ctx, time.Since(t))
return convertedList, nil
}
if len(convertedObjects) != 1 {
// This should not happened
Metrics.ObserveWebhookConversionFailure(ctx, time.Since(t), WebhookConversionNoObjectsReturnedFailure)
return nil, fmt.Errorf("conversion webhook for %v failed, no objects returned", in.GetObjectKind())
}
converted, err := getRawExtensionObject(convertedObjects[0])
if err != nil {
Metrics.ObserveWebhookConversionFailure(ctx, time.Since(t), WebhookConversionInvalidConvertedObjectFailure)
return nil, err
}
if e, a := toGV, converted.GetObjectKind().GroupVersionKind().GroupVersion(); e != a {
Metrics.ObserveWebhookConversionFailure(ctx, time.Since(t), WebhookConversionInvalidConvertedObjectFailure)
return nil, fmt.Errorf("conversion webhook for %v returned invalid object at index 0: invalid groupVersion (expected %v, received %v)", in.GetObjectKind().GroupVersionKind(), e, a)
}
if e, a := in.GetObjectKind().GroupVersionKind().Kind, converted.GetObjectKind().GroupVersionKind().Kind; e != a {
Metrics.ObserveWebhookConversionFailure(ctx, time.Since(t), WebhookConversionInvalidConvertedObjectFailure)
return nil, fmt.Errorf("conversion webhook for %v returned invalid object at index 0: invalid kind (expected %v, received %v)", in.GetObjectKind().GroupVersionKind(), e, a)
}
unstructConverted, ok := converted.(*unstructured.Unstructured)
if !ok {
// this should not happened
Metrics.ObserveWebhookConversionFailure(ctx, time.Since(t), WebhookConversionInvalidConvertedObjectFailure)
return nil, fmt.Errorf("conversion webhook for %v failed, unexpected type %T at index 0", in.GetObjectKind().GroupVersionKind(), converted)
}
unstructIn, ok := in.(*unstructured.Unstructured)
if !ok {
// this should not happened
Metrics.ObserveWebhookConversionFailure(ctx, time.Since(t), WebhookConversionInvalidConvertedObjectFailure)
return nil, fmt.Errorf("conversion webhook for %v failed unexpected input type %T", in.GetObjectKind().GroupVersionKind(), in)
}
if err := validateConvertedObject(unstructIn, unstructConverted); err != nil {
Metrics.ObserveWebhookConversionFailure(ctx, time.Since(t), WebhookConversionInvalidConvertedObjectFailure)
return nil, fmt.Errorf("conversion webhook for %v returned invalid object: %v", in.GetObjectKind().GroupVersionKind(), err)
}
if err := restoreObjectMeta(unstructIn, unstructConverted); err != nil {
Metrics.ObserveWebhookConversionFailure(ctx, time.Since(t), WebhookConversionInvalidConvertedObjectFailure)
return nil, fmt.Errorf("conversion webhook for %v returned invalid metadata: %v", in.GetObjectKind().GroupVersionKind(), err)
}
Metrics.ObserveWebhookConversionSuccess(ctx, time.Since(t))
return converted, nil
}

View File

@ -19,11 +19,13 @@ package testutil
import (
"fmt"
"io"
"testing"
"github.com/prometheus/client_golang/prometheus/testutil"
apimachineryversion "k8s.io/apimachinery/pkg/version"
"k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/legacyregistry"
)
// CollectAndCompare registers the provided Collector with a newly created
@ -91,3 +93,62 @@ func NewFakeKubeRegistry(ver string) metrics.KubeRegistry {
return metrics.NewKubeRegistry()
}
func AssertVectorCount(t *testing.T, name string, labelFilter map[string]string, wantCount int) {
metrics, err := legacyregistry.DefaultGatherer.Gather()
if err != nil {
t.Fatalf("Failed to gather metrics: %s", err)
}
counterSum := 0
for _, mf := range metrics {
if mf.GetName() != name {
continue // Ignore other metrics.
}
for _, metric := range mf.GetMetric() {
if !LabelsMatch(metric, labelFilter) {
continue
}
counterSum += int(metric.GetCounter().GetValue())
}
}
if wantCount != counterSum {
t.Errorf("Wanted count %d, got %d for metric %s with labels %#+v", wantCount, counterSum, name, labelFilter)
for _, mf := range metrics {
if mf.GetName() == name {
for _, metric := range mf.GetMetric() {
t.Logf("\tnear match: %s", metric.String())
}
}
}
}
}
func AssertHistogramTotalCount(t *testing.T, name string, labelFilter map[string]string, wantCount int) {
metrics, err := legacyregistry.DefaultGatherer.Gather()
if err != nil {
t.Fatalf("Failed to gather metrics: %s", err)
}
counterSum := 0
for _, mf := range metrics {
if mf.GetName() != name {
continue // Ignore other metrics.
}
for _, metric := range mf.GetMetric() {
if !LabelsMatch(metric, labelFilter) {
continue
}
counterSum += int(metric.GetHistogram().GetSampleCount())
}
}
if wantCount != counterSum {
t.Errorf("Wanted count %d, got %d for metric %s with labels %#+v", wantCount, counterSum, name, labelFilter)
for _, mf := range metrics {
if mf.GetName() == name {
for _, metric := range mf.GetMetric() {
t.Logf("\tnear match: %s\n", metric.String())
}
}
}
}
}