From f89bcffd55e205c610746c418062a305456cf29f Mon Sep 17 00:00:00 2001 From: Joseph Anttila Hall Date: Mon, 31 Oct 2022 12:59:26 -0700 Subject: [PATCH] egress_selector.go: Add a dial starts metric. Emit this metric before any potentially blocking dial work. --- .../server/egressselector/egress_selector.go | 1 + .../egressselector/egress_selector_test.go | 10 +++++++++ .../server/egressselector/metrics/metrics.go | 21 ++++++++++++++++++- 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/staging/src/k8s.io/apiserver/pkg/server/egressselector/egress_selector.go b/staging/src/k8s.io/apiserver/pkg/server/egressselector/egress_selector.go index fcb4dd46b19..683b774febe 100644 --- a/staging/src/k8s.io/apiserver/pkg/server/egressselector/egress_selector.go +++ b/staging/src/k8s.io/apiserver/pkg/server/egressselector/egress_selector.go @@ -243,6 +243,7 @@ func (d *dialerCreator) createDialer() utilnet.DialFunc { ctx, span := tracing.Start(ctx, fmt.Sprintf("Proxy via %s protocol over %s", d.options.protocol, d.options.transport), attribute.String("address", addr)) defer span.End(500 * time.Millisecond) start := egressmetrics.Metrics.Clock().Now() + egressmetrics.Metrics.ObserveDialStart(d.options.protocol, d.options.transport) proxier, err := d.connector.connect(ctx) if err != nil { egressmetrics.Metrics.ObserveDialFailure(d.options.protocol, d.options.transport, egressmetrics.StageConnect) diff --git a/staging/src/k8s.io/apiserver/pkg/server/egressselector/egress_selector_test.go b/staging/src/k8s.io/apiserver/pkg/server/egressselector/egress_selector_test.go index c5aaf6b6474..1ac555d2efd 100644 --- a/staging/src/k8s.io/apiserver/pkg/server/egressselector/egress_selector_test.go +++ b/staging/src/k8s.io/apiserver/pkg/server/egressselector/egress_selector_test.go @@ -201,6 +201,16 @@ func TestMetrics(t *testing.T) { metrics []string want string }{ + "connect to proxy server start": { + connectorErr: true, + proxierErr: true, + metrics: []string{"apiserver_egress_dialer_dial_start_total"}, + want: ` + # HELP apiserver_egress_dialer_dial_start_total [ALPHA] Dial starts, labeled by the protocol (http-connect or grpc) and transport (tcp or uds). + # TYPE apiserver_egress_dialer_dial_start_total counter + apiserver_egress_dialer_dial_start_total{protocol="fake_protocol",transport="fake_transport"} 1 +`, + }, "connect to proxy server error": { connectorErr: true, proxierErr: false, diff --git a/staging/src/k8s.io/apiserver/pkg/server/egressselector/metrics/metrics.go b/staging/src/k8s.io/apiserver/pkg/server/egressselector/metrics/metrics.go index af384618181..2e39947cd5e 100644 --- a/staging/src/k8s.io/apiserver/pkg/server/egressselector/metrics/metrics.go +++ b/staging/src/k8s.io/apiserver/pkg/server/egressselector/metrics/metrics.go @@ -53,12 +53,24 @@ var ( // DialMetrics instruments dials to proxy server with prometheus metrics. type DialMetrics struct { clock clock.Clock + starts *metrics.CounterVec latencies *metrics.HistogramVec failures *metrics.CounterVec } // newDialMetrics create a new DialMetrics, configured with default metric names. func newDialMetrics() *DialMetrics { + starts := metrics.NewCounterVec( + &metrics.CounterOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "dial_start_total", + Help: "Dial starts, labeled by the protocol (http-connect or grpc) and transport (tcp or uds).", + StabilityLevel: metrics.ALPHA, + }, + []string{"protocol", "transport"}, + ) + latencies := metrics.NewHistogramVec( &metrics.HistogramOpts{ Namespace: namespace, @@ -82,9 +94,10 @@ func newDialMetrics() *DialMetrics { []string{"protocol", "transport", "stage"}, ) + legacyregistry.MustRegister(starts) legacyregistry.MustRegister(latencies) legacyregistry.MustRegister(failures) - return &DialMetrics{latencies: latencies, failures: failures, clock: clock.RealClock{}} + return &DialMetrics{starts: starts, latencies: latencies, failures: failures, clock: clock.RealClock{}} } // Clock returns the clock. @@ -99,10 +112,16 @@ func (m *DialMetrics) SetClock(c clock.Clock) { // Reset resets the metrics. func (m *DialMetrics) Reset() { + m.starts.Reset() m.latencies.Reset() m.failures.Reset() } +// ObserveDialStart records the start of a dial attempt, labeled by protocol, transport. +func (m *DialMetrics) ObserveDialStart(protocol, transport string) { + m.starts.WithLabelValues(protocol, transport).Inc() +} + // ObserveDialLatency records the latency of a dial, labeled by protocol, transport. func (m *DialMetrics) ObserveDialLatency(elapsed time.Duration, protocol, transport string) { m.latencies.WithLabelValues(protocol, transport).Observe(elapsed.Seconds())