From 83171562b0954b2e19eb69943f01a44779cc7a8f Mon Sep 17 00:00:00 2001 From: Lukasz Szaszkiewicz Date: Wed, 15 Sep 2021 16:40:44 +0200 Subject: [PATCH] etcd-client starts retrying transient errors from the etcd cluster This PR enables unaryClientInterceptor in conjunction with Prometheus interceptor. Previously it was simply overwritten by the Prometheus interceptor. As a result etcd client didn't attempt to retry certain errors. The unaryClientInterceptor is important because it knows how to retry all sorts of errors from the etcd cluster. It will make the API server more resilient to failures - end users won't see certain errors. The full list of retriable (codes.Unavailable) errors can be found at https://github.com/etcd-io/etcd/blob/main/api/v3rpc/rpctypes/error.go#L72 --- .../pkg/storage/storagebackend/factory/etcd3.go | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/staging/src/k8s.io/apiserver/pkg/storage/storagebackend/factory/etcd3.go b/staging/src/k8s.io/apiserver/pkg/storage/storagebackend/factory/etcd3.go index 66f8e52144c..63e18699d6b 100644 --- a/staging/src/k8s.io/apiserver/pkg/storage/storagebackend/factory/etcd3.go +++ b/staging/src/k8s.io/apiserver/pkg/storage/storagebackend/factory/etcd3.go @@ -137,8 +137,13 @@ func newETCD3Client(c storagebackend.TransportConfig) (*clientv3.Client, error) } dialOptions := []grpc.DialOption{ grpc.WithBlock(), // block until the underlying connection is up - grpc.WithUnaryInterceptor(grpcprom.UnaryClientInterceptor), - grpc.WithStreamInterceptor(grpcprom.StreamClientInterceptor), + // use chained interceptors so that the default (retry and backoff) interceptors are added. + // otherwise they will be overwritten by the metric interceptor. + // + // these optional interceptors will be placed after the default ones. + // which seems to be what we want as the metrics will be collected on each attempt (retry) + grpc.WithChainUnaryInterceptor(grpcprom.UnaryClientInterceptor), + grpc.WithChainStreamInterceptor(grpcprom.StreamClientInterceptor), } if utilfeature.DefaultFeatureGate.Enabled(genericfeatures.APIServerTracing) { tracingOpts := []otelgrpc.Option{