From 8df1a5e6dcb06285309c5e6d4f2174b60f782637 Mon Sep 17 00:00:00 2001 From: Chinmay Chapla Date: Fri, 26 May 2023 19:34:36 +0000 Subject: [PATCH] Webhook conversion metrics --- .../pkg/apiserver/conversion/metrics.go | 66 +++++ .../pkg/apiserver/conversion/metrics_test.go | 268 ++++++++++++++++++ .../apiserver/conversion/webhook_converter.go | 22 +- 3 files changed, 355 insertions(+), 1 deletion(-) create mode 100644 staging/src/k8s.io/apiextensions-apiserver/pkg/apiserver/conversion/metrics_test.go diff --git a/staging/src/k8s.io/apiextensions-apiserver/pkg/apiserver/conversion/metrics.go b/staging/src/k8s.io/apiextensions-apiserver/pkg/apiserver/conversion/metrics.go index 744046b5b47..90a533acad6 100644 --- a/staging/src/k8s.io/apiextensions-apiserver/pkg/apiserver/conversion/metrics.go +++ b/staging/src/k8s.io/apiextensions-apiserver/pkg/apiserver/conversion/metrics.go @@ -17,6 +17,7 @@ limitations under the License. package conversion import ( + "context" "strconv" "sync" "time" @@ -86,3 +87,68 @@ func (m *converterMetric) Convert(in runtime.Object, targetGV schema.GroupVersio } return obj, err } + +type WebhookConversionErrorType string + +const ( + WebhookConversionCallFailure WebhookConversionErrorType = "webhook_conversion_call_failure" + WebhookConversionMalformedResponseFailure WebhookConversionErrorType = "webhook_conversion_malformed_response_failure" + WebhookConversionPartialResponseFailure WebhookConversionErrorType = "webhook_conversion_partial_response_failure" + WebhookConversionInvalidConvertedObjectFailure WebhookConversionErrorType = "webhook_conversion_invalid_converted_object_failure" + WebhookConversionNoObjectsReturnedFailure WebhookConversionErrorType = "webhook_conversion_no_objects_returned_failure" +) + +var ( + Metrics = newWebhookConversionMetrics() +) + +// WebhookConversionMetrics instruments webhook conversion with prometheus metrics. +type WebhookConversionMetrics struct { + webhookConversionRequest *metrics.CounterVec + webhookConversionLatency *metrics.HistogramVec +} + +func newWebhookConversionMetrics() *WebhookConversionMetrics { + webhookConversionRequest := metrics.NewCounterVec( + &metrics.CounterOpts{ + Name: "webhook_conversion_requests", + Help: "Counter for webhook conversion requests with success/failure and failure error type", + StabilityLevel: metrics.ALPHA, + }, + []string{"result", "failure_type"}) + + webhookConversionLatency := metrics.NewHistogramVec( + &metrics.HistogramOpts{ + Name: "webhook_conversion_duration_seconds", + Help: "Webhook conversion request latency", + Buckets: metrics.ExponentialBuckets(0.001, 2, 15), + StabilityLevel: metrics.ALPHA, + }, + []string{"result", "failure_type"}, + ) + + legacyregistry.MustRegister(webhookConversionRequest) + legacyregistry.MustRegister(webhookConversionLatency) + + return &WebhookConversionMetrics{webhookConversionRequest: webhookConversionRequest, webhookConversionLatency: webhookConversionLatency} +} + +// Observe successful request +func (m *WebhookConversionMetrics) ObserveWebhookConversionSuccess(ctx context.Context, elapsed time.Duration) { + result := "success" + m.webhookConversionRequest.WithContext(ctx).WithLabelValues(result, "").Inc() + m.observe(ctx, elapsed, result, "") +} + +// Observe failure with failure type +func (m *WebhookConversionMetrics) ObserveWebhookConversionFailure(ctx context.Context, elapsed time.Duration, errorType WebhookConversionErrorType) { + result := "failure" + m.webhookConversionRequest.WithContext(ctx).WithLabelValues(result, string(errorType)).Inc() + m.observe(ctx, elapsed, result, errorType) +} + +// Observe latency +func (m *WebhookConversionMetrics) observe(ctx context.Context, elapsed time.Duration, result string, errorType WebhookConversionErrorType) { + elapsedSeconds := elapsed.Seconds() + m.webhookConversionLatency.WithContext(ctx).WithLabelValues(result, string(errorType)).Observe(elapsedSeconds) +} diff --git a/staging/src/k8s.io/apiextensions-apiserver/pkg/apiserver/conversion/metrics_test.go b/staging/src/k8s.io/apiextensions-apiserver/pkg/apiserver/conversion/metrics_test.go new file mode 100644 index 00000000000..36db1ac9c84 --- /dev/null +++ b/staging/src/k8s.io/apiextensions-apiserver/pkg/apiserver/conversion/metrics_test.go @@ -0,0 +1,268 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package conversion + +import ( + "context" + "testing" + "time" + + "k8s.io/component-base/metrics" + "k8s.io/component-base/metrics/legacyregistry" + "k8s.io/component-base/metrics/testutil" +) + +func TestWebhookConversionMetrics_ObserveWebhookConversionSuccess(t *testing.T) { + type fields struct { + webhookConversionRequest *metrics.CounterVec + webhookConversionLatency *metrics.HistogramVec + } + type args struct { + ctx context.Context + elapsed time.Duration + } + tests := []struct { + name string + fields fields + args args + wantLabels map[string]string + expectedRequestValue int + }{ + // TODO: Add test cases. + { + name: "test_conversion_success", + fields: fields{ + webhookConversionRequest: Metrics.webhookConversionRequest, + webhookConversionLatency: Metrics.webhookConversionLatency, + }, + args: args{ + ctx: context.TODO(), + elapsed: 2 * time.Second, + }, + wantLabels: map[string]string{ + "result": "success", + "failure_type": "", + }, + expectedRequestValue: 1, + }, { + name: "test_conversion_success_2", + fields: fields{ + webhookConversionRequest: Metrics.webhookConversionRequest, + webhookConversionLatency: Metrics.webhookConversionLatency, + }, + args: args{ + ctx: context.TODO(), + elapsed: 2 * time.Second, + }, + wantLabels: map[string]string{ + "result": "success", + "failure_type": "", + }, + expectedRequestValue: 2, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + m := &WebhookConversionMetrics{ + webhookConversionRequest: tt.fields.webhookConversionRequest, + webhookConversionLatency: tt.fields.webhookConversionLatency, + } + m.ObserveWebhookConversionSuccess(tt.args.ctx, tt.args.elapsed) + expectCounterValue(t, "webhook_conversion_requests", tt.wantLabels, tt.expectedRequestValue) + expectHistogramCountTotal(t, "webhook_conversion_duration_seconds", tt.wantLabels, tt.expectedRequestValue) + }) + } +} + +func TestWebhookConversionMetrics_ObserveWebhookConversionFailure(t *testing.T) { + type fields struct { + webhookConversionRequest *metrics.CounterVec + webhookConversionLatency *metrics.HistogramVec + } + type args struct { + ctx context.Context + elapsed time.Duration + errorType WebhookConversionErrorType + } + tests := []struct { + name string + fields fields + args args + wantLabels map[string]string + expectedRequestValue int + expectedLatencyCount int + }{ + // TODO: Add test cases. + { + name: "test_conversion_failure", + fields: fields{ + webhookConversionRequest: Metrics.webhookConversionRequest, + webhookConversionLatency: Metrics.webhookConversionLatency, + }, + args: args{ + ctx: context.TODO(), + elapsed: 2 * time.Second, + errorType: WebhookConversionCallFailure, + }, + wantLabels: map[string]string{ + "result": "failure", + "failure_type": string(WebhookConversionCallFailure), + }, + expectedRequestValue: 1, + expectedLatencyCount: 1, + }, { + name: "test_conversion_failure_2", + fields: fields{ + webhookConversionRequest: Metrics.webhookConversionRequest, + webhookConversionLatency: Metrics.webhookConversionLatency, + }, + args: args{ + ctx: context.TODO(), + elapsed: 2 * time.Second, + errorType: WebhookConversionMalformedResponseFailure, + }, + wantLabels: map[string]string{ + "result": "failure", + "failure_type": string(WebhookConversionMalformedResponseFailure), + }, + expectedRequestValue: 1, + expectedLatencyCount: 2, + }, { + name: "test_conversion_failure_3", + fields: fields{ + webhookConversionRequest: Metrics.webhookConversionRequest, + webhookConversionLatency: Metrics.webhookConversionLatency, + }, + args: args{ + ctx: context.TODO(), + elapsed: 2 * time.Second, + errorType: WebhookConversionPartialResponseFailure, + }, + wantLabels: map[string]string{ + "result": "failure", + "failure_type": string(WebhookConversionPartialResponseFailure), + }, + expectedRequestValue: 1, + expectedLatencyCount: 3, + }, { + name: "test_conversion_failure_4", + fields: fields{ + webhookConversionRequest: Metrics.webhookConversionRequest, + webhookConversionLatency: Metrics.webhookConversionLatency, + }, + args: args{ + ctx: context.TODO(), + elapsed: 2 * time.Second, + errorType: WebhookConversionInvalidConvertedObjectFailure, + }, + wantLabels: map[string]string{ + "result": "failure", + "failure_type": string(WebhookConversionInvalidConvertedObjectFailure), + }, + expectedRequestValue: 1, + expectedLatencyCount: 4, + }, { + name: "test_conversion_failure_5", + fields: fields{ + webhookConversionRequest: Metrics.webhookConversionRequest, + webhookConversionLatency: Metrics.webhookConversionLatency, + }, + args: args{ + ctx: context.TODO(), + elapsed: 2 * time.Second, + errorType: WebhookConversionNoObjectsReturnedFailure, + }, + wantLabels: map[string]string{ + "result": "failure", + "failure_type": string(WebhookConversionNoObjectsReturnedFailure), + }, + expectedRequestValue: 1, + expectedLatencyCount: 5, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + m := &WebhookConversionMetrics{ + webhookConversionRequest: tt.fields.webhookConversionRequest, + webhookConversionLatency: tt.fields.webhookConversionLatency, + } + m.ObserveWebhookConversionFailure(tt.args.ctx, tt.args.elapsed, tt.args.errorType) + expectCounterValue(t, "webhook_conversion_requests", tt.wantLabels, tt.expectedRequestValue) + expectHistogramCountTotal(t, "webhook_conversion_duration_seconds", tt.wantLabels, tt.expectedRequestValue) + }) + } +} + +func expectCounterValue(t *testing.T, name string, labelFilter map[string]string, wantCount int) { + metrics, err := legacyregistry.DefaultGatherer.Gather() + if err != nil { + t.Fatalf("Failed to gather metrics: %s", err) + } + + counterSum := 0 + for _, mf := range metrics { + if mf.GetName() != name { + continue // Ignore other metrics. + } + for _, metric := range mf.GetMetric() { + if !testutil.LabelsMatch(metric, labelFilter) { + continue + } + counterSum += int(metric.GetCounter().GetValue()) + } + } + if wantCount != counterSum { + t.Errorf("Wanted count %d, got %d for metric %s with labels %#+v", wantCount, counterSum, name, labelFilter) + for _, mf := range metrics { + if mf.GetName() == name { + for _, metric := range mf.GetMetric() { + t.Logf("\tnear match: %s", metric.String()) + } + } + } + } +} + +func expectHistogramCountTotal(t *testing.T, name string, labelFilter map[string]string, wantCount int) { + metrics, err := legacyregistry.DefaultGatherer.Gather() + if err != nil { + t.Fatalf("Failed to gather metrics: %s", err) + } + + counterSum := 0 + for _, mf := range metrics { + if mf.GetName() != name { + continue // Ignore other metrics. + } + for _, metric := range mf.GetMetric() { + if !testutil.LabelsMatch(metric, labelFilter) { + continue + } + counterSum += int(metric.GetHistogram().GetSampleCount()) + } + } + if wantCount != counterSum { + t.Errorf("Wanted count %d, got %d for metric %s with labels %#+v", wantCount, counterSum, name, labelFilter) + for _, mf := range metrics { + if mf.GetName() == name { + for _, metric := range mf.GetMetric() { + t.Logf("\tnear match: %s\n", metric.String()) + } + } + } + } +} diff --git a/staging/src/k8s.io/apiextensions-apiserver/pkg/apiserver/conversion/webhook_converter.go b/staging/src/k8s.io/apiextensions-apiserver/pkg/apiserver/conversion/webhook_converter.go index e72454931fa..30047b4fd82 100644 --- a/staging/src/k8s.io/apiextensions-apiserver/pkg/apiserver/conversion/webhook_converter.go +++ b/staging/src/k8s.io/apiextensions-apiserver/pkg/apiserver/conversion/webhook_converter.go @@ -237,7 +237,7 @@ func (c *webhookConverter) Convert(in runtime.Object, toGV schema.GroupVersion) if isEmptyUnstructuredObject(in) { return c.nopConverter.Convert(in, toGV) } - + t := time.Now() listObj, isList := in.(*unstructured.UnstructuredList) requestUID := uuid.NewUUID() @@ -250,6 +250,7 @@ func (c *webhookConverter) Convert(in runtime.Object, toGV schema.GroupVersion) objCount := len(objectsToConvert) if objCount == 0 { + Metrics.ObserveWebhookConversionSuccess(ctx, time.Since(t)) // no objects needed conversion if !isList { // for a single item, return as-is @@ -275,16 +276,19 @@ func (c *webhookConverter) Convert(in runtime.Object, toGV schema.GroupVersion) r := c.restClient.Post().Body(request).Do(ctx) if err := r.Into(response); err != nil { // TODO: Return a webhook specific error to be able to convert it to meta.Status + Metrics.ObserveWebhookConversionFailure(ctx, time.Since(t), WebhookConversionCallFailure) return nil, fmt.Errorf("conversion webhook for %v failed: %v", in.GetObjectKind().GroupVersionKind(), err) } span.AddEvent("Request completed") convertedObjects, err := getConvertedObjectsFromResponse(requestUID, response) if err != nil { + Metrics.ObserveWebhookConversionFailure(ctx, time.Since(t), WebhookConversionMalformedResponseFailure) return nil, fmt.Errorf("conversion webhook for %v failed: %v", in.GetObjectKind().GroupVersionKind(), err) } if len(convertedObjects) != len(objectsToConvert) { + Metrics.ObserveWebhookConversionFailure(ctx, time.Since(t), WebhookConversionPartialResponseFailure) return nil, fmt.Errorf("conversion webhook for %v returned %d objects, expected %d", in.GetObjectKind().GroupVersionKind(), len(convertedObjects), len(objectsToConvert)) } @@ -302,62 +306,78 @@ func (c *webhookConverter) Convert(in runtime.Object, toGV schema.GroupVersion) } converted, err := getRawExtensionObject(convertedObjects[convertedIndex]) if err != nil { + Metrics.ObserveWebhookConversionFailure(ctx, time.Since(t), WebhookConversionInvalidConvertedObjectFailure) return nil, fmt.Errorf("conversion webhook for %v returned invalid converted object at index %v: %v", in.GetObjectKind().GroupVersionKind(), convertedIndex, err) } convertedIndex++ if expected, got := toGV, converted.GetObjectKind().GroupVersionKind().GroupVersion(); expected != got { + Metrics.ObserveWebhookConversionFailure(ctx, time.Since(t), WebhookConversionInvalidConvertedObjectFailure) return nil, fmt.Errorf("conversion webhook for %v returned invalid converted object at index %v: invalid groupVersion (expected %v, received %v)", in.GetObjectKind().GroupVersionKind(), convertedIndex, expected, got) } if expected, got := original.GetObjectKind().GroupVersionKind().Kind, converted.GetObjectKind().GroupVersionKind().Kind; expected != got { + Metrics.ObserveWebhookConversionFailure(ctx, time.Since(t), WebhookConversionInvalidConvertedObjectFailure) return nil, fmt.Errorf("conversion webhook for %v returned invalid converted object at index %v: invalid kind (expected %v, received %v)", in.GetObjectKind().GroupVersionKind(), convertedIndex, expected, got) } unstructConverted, ok := converted.(*unstructured.Unstructured) if !ok { // this should not happened + Metrics.ObserveWebhookConversionFailure(ctx, time.Since(t), WebhookConversionInvalidConvertedObjectFailure) return nil, fmt.Errorf("conversion webhook for %v returned invalid converted object at index %v: invalid type, expected=Unstructured, got=%T", in.GetObjectKind().GroupVersionKind(), convertedIndex, converted) } if err := validateConvertedObject(original, unstructConverted); err != nil { + Metrics.ObserveWebhookConversionFailure(ctx, time.Since(t), WebhookConversionInvalidConvertedObjectFailure) return nil, fmt.Errorf("conversion webhook for %v returned invalid converted object at index %v: %v", in.GetObjectKind().GroupVersionKind(), convertedIndex, err) } if err := restoreObjectMeta(original, unstructConverted); err != nil { + Metrics.ObserveWebhookConversionFailure(ctx, time.Since(t), WebhookConversionInvalidConvertedObjectFailure) return nil, fmt.Errorf("conversion webhook for %v returned invalid metadata in object at index %v: %v", in.GetObjectKind().GroupVersionKind(), convertedIndex, err) } convertedList.Items[i] = *unstructConverted } convertedList.SetAPIVersion(toGV.String()) + Metrics.ObserveWebhookConversionSuccess(ctx, time.Since(t)) return convertedList, nil } if len(convertedObjects) != 1 { // This should not happened + Metrics.ObserveWebhookConversionFailure(ctx, time.Since(t), WebhookConversionNoObjectsReturnedFailure) return nil, fmt.Errorf("conversion webhook for %v failed, no objects returned", in.GetObjectKind()) } converted, err := getRawExtensionObject(convertedObjects[0]) if err != nil { + Metrics.ObserveWebhookConversionFailure(ctx, time.Since(t), WebhookConversionInvalidConvertedObjectFailure) return nil, err } if e, a := toGV, converted.GetObjectKind().GroupVersionKind().GroupVersion(); e != a { + Metrics.ObserveWebhookConversionFailure(ctx, time.Since(t), WebhookConversionInvalidConvertedObjectFailure) return nil, fmt.Errorf("conversion webhook for %v returned invalid object at index 0: invalid groupVersion (expected %v, received %v)", in.GetObjectKind().GroupVersionKind(), e, a) } if e, a := in.GetObjectKind().GroupVersionKind().Kind, converted.GetObjectKind().GroupVersionKind().Kind; e != a { + Metrics.ObserveWebhookConversionFailure(ctx, time.Since(t), WebhookConversionInvalidConvertedObjectFailure) return nil, fmt.Errorf("conversion webhook for %v returned invalid object at index 0: invalid kind (expected %v, received %v)", in.GetObjectKind().GroupVersionKind(), e, a) } unstructConverted, ok := converted.(*unstructured.Unstructured) if !ok { // this should not happened + Metrics.ObserveWebhookConversionFailure(ctx, time.Since(t), WebhookConversionInvalidConvertedObjectFailure) return nil, fmt.Errorf("conversion webhook for %v failed, unexpected type %T at index 0", in.GetObjectKind().GroupVersionKind(), converted) } unstructIn, ok := in.(*unstructured.Unstructured) if !ok { // this should not happened + Metrics.ObserveWebhookConversionFailure(ctx, time.Since(t), WebhookConversionInvalidConvertedObjectFailure) return nil, fmt.Errorf("conversion webhook for %v failed unexpected input type %T", in.GetObjectKind().GroupVersionKind(), in) } if err := validateConvertedObject(unstructIn, unstructConverted); err != nil { + Metrics.ObserveWebhookConversionFailure(ctx, time.Since(t), WebhookConversionInvalidConvertedObjectFailure) return nil, fmt.Errorf("conversion webhook for %v returned invalid object: %v", in.GetObjectKind().GroupVersionKind(), err) } if err := restoreObjectMeta(unstructIn, unstructConverted); err != nil { + Metrics.ObserveWebhookConversionFailure(ctx, time.Since(t), WebhookConversionInvalidConvertedObjectFailure) return nil, fmt.Errorf("conversion webhook for %v returned invalid metadata: %v", in.GetObjectKind().GroupVersionKind(), err) } + Metrics.ObserveWebhookConversionSuccess(ctx, time.Since(t)) return converted, nil }