From 0c81eabb853e581abbcb37ebf094af3316e1012e Mon Sep 17 00:00:00 2001 From: Nic Cope Date: Thu, 28 Jul 2022 19:51:55 -0700 Subject: [PATCH] Share a single etcd3 client logger across all clients Currently the API server creates one etcd client per CRD. If clients aren't provided a logger they'll each create their own. These loggers can account for ~20% of API server memory consumption on a cluster with hundreds of CRDs. Signed-off-by: Nic Cope --- .../storage/storagebackend/factory/etcd3.go | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/staging/src/k8s.io/apiserver/pkg/storage/storagebackend/factory/etcd3.go b/staging/src/k8s.io/apiserver/pkg/storage/storagebackend/factory/etcd3.go index 214e930140b..72473d61c57 100644 --- a/staging/src/k8s.io/apiserver/pkg/storage/storagebackend/factory/etcd3.go +++ b/staging/src/k8s.io/apiserver/pkg/storage/storagebackend/factory/etcd3.go @@ -27,10 +27,12 @@ import ( "time" grpcprom "github.com/grpc-ecosystem/go-grpc-prometheus" + "go.etcd.io/etcd/client/pkg/v3/logutil" "go.etcd.io/etcd/client/pkg/v3/transport" clientv3 "go.etcd.io/etcd/client/v3" "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" "go.uber.org/zap" + "go.uber.org/zap/zapcore" "google.golang.org/grpc" "k8s.io/apimachinery/pkg/runtime" @@ -64,6 +66,14 @@ const ( dbMetricsMonitorJitter = 0.5 ) +// TODO(negz): Stop using a package scoped logger. At the time of writing we're +// creating an etcd client for each CRD. We need to pass each etcd client a +// logger or each client will create its own, which comes with a significant +// memory cost (around 20% of the API server's memory when hundreds of CRDs are +// present). The correct fix here is to not create a client per CRD. See +// https://github.com/kubernetes/kubernetes/issues/111476 for more. +var etcd3ClientLogger *zap.Logger + func init() { // grpcprom auto-registers (via an init function) their client metrics, since we are opting out of // using the global prometheus registry and using our own wrapped global registry, @@ -71,6 +81,12 @@ func init() { // For reference: https://github.com/kubernetes/kubernetes/pull/81387 legacyregistry.RawMustRegister(grpcprom.DefaultClientMetrics) dbMetricsMonitors = make(map[string]struct{}) + + l, err := logutil.CreateDefaultZapLogger(zapcore.InfoLevel) + if err != nil { + l = zap.NewNop() + } + etcd3ClientLogger = l } func newETCD3HealthCheck(c storagebackend.Config, stopCh <-chan struct{}) (func() error, error) { @@ -217,6 +233,7 @@ var newETCD3Client = func(c storagebackend.TransportConfig) (*clientv3.Client, e } dialOptions = append(dialOptions, grpc.WithContextDialer(dialer)) } + cfg := clientv3.Config{ DialTimeout: dialTimeout, DialKeepAliveTime: keepaliveTime, @@ -224,10 +241,7 @@ var newETCD3Client = func(c storagebackend.TransportConfig) (*clientv3.Client, e DialOptions: dialOptions, Endpoints: c.ServerList, TLS: tlsConfig, - - // This logger uses a significant amount of memory when many CRDs (i.e. - // 1,000+) are added to the API server, so we disable it. - Logger: zap.NewNop(), + Logger: etcd3ClientLogger, } return clientv3.New(cfg)