egress_selector.go: Add a dial starts metric.

Emit this metric before any potentially blocking dial work.
This commit is contained in:
Joseph Anttila Hall 2022-10-31 12:59:26 -07:00
parent 4a3bac5eae
commit f89bcffd55
3 changed files with 31 additions and 1 deletions

View File

@ -243,6 +243,7 @@ func (d *dialerCreator) createDialer() utilnet.DialFunc {
ctx, span := tracing.Start(ctx, fmt.Sprintf("Proxy via %s protocol over %s", d.options.protocol, d.options.transport), attribute.String("address", addr)) ctx, span := tracing.Start(ctx, fmt.Sprintf("Proxy via %s protocol over %s", d.options.protocol, d.options.transport), attribute.String("address", addr))
defer span.End(500 * time.Millisecond) defer span.End(500 * time.Millisecond)
start := egressmetrics.Metrics.Clock().Now() start := egressmetrics.Metrics.Clock().Now()
egressmetrics.Metrics.ObserveDialStart(d.options.protocol, d.options.transport)
proxier, err := d.connector.connect(ctx) proxier, err := d.connector.connect(ctx)
if err != nil { if err != nil {
egressmetrics.Metrics.ObserveDialFailure(d.options.protocol, d.options.transport, egressmetrics.StageConnect) egressmetrics.Metrics.ObserveDialFailure(d.options.protocol, d.options.transport, egressmetrics.StageConnect)

View File

@ -201,6 +201,16 @@ func TestMetrics(t *testing.T) {
metrics []string metrics []string
want string want string
}{ }{
"connect to proxy server start": {
connectorErr: true,
proxierErr: true,
metrics: []string{"apiserver_egress_dialer_dial_start_total"},
want: `
# HELP apiserver_egress_dialer_dial_start_total [ALPHA] Dial starts, labeled by the protocol (http-connect or grpc) and transport (tcp or uds).
# TYPE apiserver_egress_dialer_dial_start_total counter
apiserver_egress_dialer_dial_start_total{protocol="fake_protocol",transport="fake_transport"} 1
`,
},
"connect to proxy server error": { "connect to proxy server error": {
connectorErr: true, connectorErr: true,
proxierErr: false, proxierErr: false,

View File

@ -53,12 +53,24 @@ var (
// DialMetrics instruments dials to proxy server with prometheus metrics. // DialMetrics instruments dials to proxy server with prometheus metrics.
type DialMetrics struct { type DialMetrics struct {
clock clock.Clock clock clock.Clock
starts *metrics.CounterVec
latencies *metrics.HistogramVec latencies *metrics.HistogramVec
failures *metrics.CounterVec failures *metrics.CounterVec
} }
// newDialMetrics create a new DialMetrics, configured with default metric names. // newDialMetrics create a new DialMetrics, configured with default metric names.
func newDialMetrics() *DialMetrics { func newDialMetrics() *DialMetrics {
starts := metrics.NewCounterVec(
&metrics.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "dial_start_total",
Help: "Dial starts, labeled by the protocol (http-connect or grpc) and transport (tcp or uds).",
StabilityLevel: metrics.ALPHA,
},
[]string{"protocol", "transport"},
)
latencies := metrics.NewHistogramVec( latencies := metrics.NewHistogramVec(
&metrics.HistogramOpts{ &metrics.HistogramOpts{
Namespace: namespace, Namespace: namespace,
@ -82,9 +94,10 @@ func newDialMetrics() *DialMetrics {
[]string{"protocol", "transport", "stage"}, []string{"protocol", "transport", "stage"},
) )
legacyregistry.MustRegister(starts)
legacyregistry.MustRegister(latencies) legacyregistry.MustRegister(latencies)
legacyregistry.MustRegister(failures) legacyregistry.MustRegister(failures)
return &DialMetrics{latencies: latencies, failures: failures, clock: clock.RealClock{}} return &DialMetrics{starts: starts, latencies: latencies, failures: failures, clock: clock.RealClock{}}
} }
// Clock returns the clock. // Clock returns the clock.
@ -99,10 +112,16 @@ func (m *DialMetrics) SetClock(c clock.Clock) {
// Reset resets the metrics. // Reset resets the metrics.
func (m *DialMetrics) Reset() { func (m *DialMetrics) Reset() {
m.starts.Reset()
m.latencies.Reset() m.latencies.Reset()
m.failures.Reset() m.failures.Reset()
} }
// ObserveDialStart records the start of a dial attempt, labeled by protocol, transport.
func (m *DialMetrics) ObserveDialStart(protocol, transport string) {
m.starts.WithLabelValues(protocol, transport).Inc()
}
// ObserveDialLatency records the latency of a dial, labeled by protocol, transport. // ObserveDialLatency records the latency of a dial, labeled by protocol, transport.
func (m *DialMetrics) ObserveDialLatency(elapsed time.Duration, protocol, transport string) { func (m *DialMetrics) ObserveDialLatency(elapsed time.Duration, protocol, transport string) {
m.latencies.WithLabelValues(protocol, transport).Observe(elapsed.Seconds()) m.latencies.WithLabelValues(protocol, transport).Observe(elapsed.Seconds())