mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-08 11:38:15 +00:00
apiserver/etcd: add request and request_error metrics (#117222)
* apiserver/etcd: add request and request_error metrics Signed-off-by: iyear <ljyngup@gmail.com> * apiserver/etcd: rename etcdRequestCounts metric Signed-off-by: iyear <ljyngup@gmail.com> --------- Signed-off-by: iyear <ljyngup@gmail.com>
This commit is contained in:
parent
d09842e0ad
commit
4f6714ef42
@ -47,6 +47,22 @@ var (
|
||||
},
|
||||
[]string{"operation", "type"},
|
||||
)
|
||||
etcdRequestCounts = compbasemetrics.NewCounterVec(
|
||||
&compbasemetrics.CounterOpts{
|
||||
Name: "etcd_requests_total",
|
||||
Help: "Etcd request counts for each operation and object type.",
|
||||
StabilityLevel: compbasemetrics.ALPHA,
|
||||
},
|
||||
[]string{"operation", "type"},
|
||||
)
|
||||
etcdRequestErrorCounts = compbasemetrics.NewCounterVec(
|
||||
&compbasemetrics.CounterOpts{
|
||||
Name: "etcd_request_errors_total",
|
||||
Help: "Etcd failed request counts for each operation and object type.",
|
||||
StabilityLevel: compbasemetrics.ALPHA,
|
||||
},
|
||||
[]string{"operation", "type"},
|
||||
)
|
||||
objectCounts = compbasemetrics.NewGaugeVec(
|
||||
&compbasemetrics.GaugeOpts{
|
||||
Name: "apiserver_storage_objects",
|
||||
@ -140,6 +156,8 @@ func Register() {
|
||||
// Register the metrics.
|
||||
registerMetrics.Do(func() {
|
||||
legacyregistry.MustRegister(etcdRequestLatency)
|
||||
legacyregistry.MustRegister(etcdRequestCounts)
|
||||
legacyregistry.MustRegister(etcdRequestErrorCounts)
|
||||
legacyregistry.MustRegister(objectCounts)
|
||||
legacyregistry.MustRegister(dbTotalSize)
|
||||
legacyregistry.MustRegister(etcdBookmarkCounts)
|
||||
@ -157,9 +175,15 @@ func UpdateObjectCount(resourcePrefix string, count int64) {
|
||||
objectCounts.WithLabelValues(resourcePrefix).Set(float64(count))
|
||||
}
|
||||
|
||||
// RecordEtcdRequestLatency sets the etcd_request_duration_seconds metrics.
|
||||
func RecordEtcdRequestLatency(verb, resource string, startTime time.Time) {
|
||||
etcdRequestLatency.WithLabelValues(verb, resource).Observe(sinceInSeconds(startTime))
|
||||
// RecordEtcdRequest updates and sets the etcd_request_duration_seconds,
|
||||
// etcd_request_total, etcd_request_errors_total metrics.
|
||||
func RecordEtcdRequest(verb, resource string, err error, startTime time.Time) {
|
||||
v := []string{verb, resource}
|
||||
etcdRequestLatency.WithLabelValues(v...).Observe(sinceInSeconds(startTime))
|
||||
etcdRequestCounts.WithLabelValues(v...).Inc()
|
||||
if err != nil {
|
||||
etcdRequestErrorCounts.WithLabelValues(v...).Inc()
|
||||
}
|
||||
}
|
||||
|
||||
// RecordEtcdEvent updated the etcd_events_received_total metric.
|
||||
@ -183,7 +207,9 @@ func Reset() {
|
||||
}
|
||||
|
||||
// sinceInSeconds gets the time since the specified start in seconds.
|
||||
func sinceInSeconds(start time.Time) float64 {
|
||||
//
|
||||
// This is a variable to facilitate testing.
|
||||
var sinceInSeconds = func(start time.Time) float64 {
|
||||
return time.Since(start).Seconds()
|
||||
}
|
||||
|
||||
|
@ -17,8 +17,10 @@ limitations under the License.
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"k8s.io/component-base/metrics"
|
||||
"k8s.io/component-base/metrics/testutil"
|
||||
@ -55,3 +57,125 @@ func TestRecordDecodeError(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestRecordEtcdRequest(t *testing.T) {
|
||||
registry := metrics.NewKubeRegistry()
|
||||
|
||||
// modify default sinceInSeconds to constant NOW
|
||||
sinceInSeconds = func(t time.Time) float64 {
|
||||
return time.Unix(0, 300*int64(time.Millisecond)).Sub(t).Seconds()
|
||||
}
|
||||
|
||||
testedMetrics := []metrics.Registerable{
|
||||
etcdRequestCounts,
|
||||
etcdRequestErrorCounts,
|
||||
etcdRequestLatency,
|
||||
}
|
||||
|
||||
testedMetricsName := make([]string, 0, len(testedMetrics))
|
||||
for _, m := range testedMetrics {
|
||||
registry.MustRegister(m)
|
||||
testedMetricsName = append(testedMetricsName, m.FQName())
|
||||
}
|
||||
|
||||
testCases := []struct {
|
||||
desc string
|
||||
operation string
|
||||
typ string
|
||||
err error
|
||||
startTime time.Time
|
||||
want string
|
||||
}{
|
||||
{
|
||||
desc: "success_request",
|
||||
operation: "foo",
|
||||
typ: "bar",
|
||||
err: nil,
|
||||
startTime: time.Unix(0, 0), // 0.3s
|
||||
want: `# HELP etcd_request_duration_seconds [ALPHA] Etcd request latency in seconds for each operation and object type.
|
||||
# TYPE etcd_request_duration_seconds histogram
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.005"} 0
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.025"} 0
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.05"} 0
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.1"} 0
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.2"} 0
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.4"} 1
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.6"} 1
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.8"} 1
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="1"} 1
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="1.25"} 1
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="1.5"} 1
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="2"} 1
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="3"} 1
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="4"} 1
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="5"} 1
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="6"} 1
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="8"} 1
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="10"} 1
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="15"} 1
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="20"} 1
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="30"} 1
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="45"} 1
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="60"} 1
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="+Inf"} 1
|
||||
etcd_request_duration_seconds_sum{operation="foo",type="bar"} 0.3
|
||||
etcd_request_duration_seconds_count{operation="foo",type="bar"} 1
|
||||
# HELP etcd_requests_total [ALPHA] Etcd request counts for each operation and object type.
|
||||
# TYPE etcd_requests_total counter
|
||||
etcd_requests_total{operation="foo",type="bar"} 1
|
||||
`,
|
||||
},
|
||||
{
|
||||
desc: "failed_request",
|
||||
operation: "foo",
|
||||
typ: "bar",
|
||||
err: errors.New("some error"),
|
||||
startTime: time.Unix(0, 0), // 0.3s
|
||||
want: `# HELP etcd_request_duration_seconds [ALPHA] Etcd request latency in seconds for each operation and object type.
|
||||
# TYPE etcd_request_duration_seconds histogram
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.005"} 0
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.025"} 0
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.05"} 0
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.1"} 0
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.2"} 0
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.4"} 1
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.6"} 1
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.8"} 1
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="1"} 1
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="1.25"} 1
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="1.5"} 1
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="2"} 1
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="3"} 1
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="4"} 1
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="5"} 1
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="6"} 1
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="8"} 1
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="10"} 1
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="15"} 1
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="20"} 1
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="30"} 1
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="45"} 1
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="60"} 1
|
||||
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="+Inf"} 1
|
||||
etcd_request_duration_seconds_sum{operation="foo",type="bar"} 0.3
|
||||
etcd_request_duration_seconds_count{operation="foo",type="bar"} 1
|
||||
# HELP etcd_requests_total [ALPHA] Etcd request counts for each operation and object type.
|
||||
# TYPE etcd_requests_total counter
|
||||
etcd_requests_total{operation="foo",type="bar"} 1
|
||||
# HELP etcd_request_errors_total [ALPHA] Etcd failed request counts for each operation and object type.
|
||||
# TYPE etcd_request_errors_total counter
|
||||
etcd_request_errors_total{operation="foo",type="bar"} 1
|
||||
`,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range testCases {
|
||||
t.Run(test.desc, func(t *testing.T) {
|
||||
defer registry.Reset()
|
||||
RecordEtcdRequest(test.operation, test.typ, test.err, test.startTime)
|
||||
if err := testutil.GatherAndCompare(registry, strings.NewReader(test.want), testedMetricsName...); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -136,7 +136,7 @@ func (s *store) Get(ctx context.Context, key string, opts storage.GetOptions, ou
|
||||
}
|
||||
startTime := time.Now()
|
||||
getResp, err := s.client.KV.Get(ctx, preparedKey)
|
||||
metrics.RecordEtcdRequestLatency("get", s.groupResourceString, startTime)
|
||||
metrics.RecordEtcdRequest("get", s.groupResourceString, err, startTime)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@ -210,7 +210,7 @@ func (s *store) Create(ctx context.Context, key string, obj, out runtime.Object,
|
||||
).Then(
|
||||
clientv3.OpPut(preparedKey, string(newData), opts...),
|
||||
).Commit()
|
||||
metrics.RecordEtcdRequestLatency("create", s.groupResourceString, startTime)
|
||||
metrics.RecordEtcdRequest("create", s.groupResourceString, err, startTime)
|
||||
if err != nil {
|
||||
span.AddEvent("Txn call failed", attribute.String("err", err.Error()))
|
||||
return err
|
||||
@ -255,7 +255,7 @@ func (s *store) conditionalDelete(
|
||||
getCurrentState := func() (*objState, error) {
|
||||
startTime := time.Now()
|
||||
getResp, err := s.client.KV.Get(ctx, key)
|
||||
metrics.RecordEtcdRequestLatency("get", s.groupResourceString, startTime)
|
||||
metrics.RecordEtcdRequest("get", s.groupResourceString, err, startTime)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -337,7 +337,7 @@ func (s *store) conditionalDelete(
|
||||
).Else(
|
||||
clientv3.OpGet(key),
|
||||
).Commit()
|
||||
metrics.RecordEtcdRequestLatency("delete", s.groupResourceString, startTime)
|
||||
metrics.RecordEtcdRequest("delete", s.groupResourceString, err, startTime)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@ -391,7 +391,7 @@ func (s *store) GuaranteedUpdate(
|
||||
getCurrentState := func() (*objState, error) {
|
||||
startTime := time.Now()
|
||||
getResp, err := s.client.KV.Get(ctx, preparedKey)
|
||||
metrics.RecordEtcdRequestLatency("get", s.groupResourceString, startTime)
|
||||
metrics.RecordEtcdRequest("get", s.groupResourceString, err, startTime)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -512,7 +512,7 @@ func (s *store) GuaranteedUpdate(
|
||||
).Else(
|
||||
clientv3.OpGet(preparedKey),
|
||||
).Commit()
|
||||
metrics.RecordEtcdRequestLatency("update", s.groupResourceString, startTime)
|
||||
metrics.RecordEtcdRequest("update", s.groupResourceString, err, startTime)
|
||||
if err != nil {
|
||||
span.AddEvent("Txn call failed", attribute.String("err", err.Error()))
|
||||
return err
|
||||
@ -575,7 +575,7 @@ func (s *store) Count(key string) (int64, error) {
|
||||
|
||||
startTime := time.Now()
|
||||
getResp, err := s.client.KV.Get(context.Background(), preparedKey, clientv3.WithRange(clientv3.GetPrefixRangeEnd(preparedKey)), clientv3.WithCountOnly())
|
||||
metrics.RecordEtcdRequestLatency("listWithCount", preparedKey, startTime)
|
||||
metrics.RecordEtcdRequest("listWithCount", preparedKey, err, startTime)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
@ -720,14 +720,16 @@ func (s *store) GetList(ctx context.Context, key string, opts storage.ListOption
|
||||
numReturn := v.Len()
|
||||
metrics.RecordStorageListMetrics(s.groupResourceString, numFetched, numEvald, numReturn)
|
||||
}()
|
||||
|
||||
metricsOp := "get"
|
||||
if recursive {
|
||||
metricsOp = "list"
|
||||
}
|
||||
|
||||
for {
|
||||
startTime := time.Now()
|
||||
getResp, err = s.client.KV.Get(ctx, preparedKey, options...)
|
||||
if recursive {
|
||||
metrics.RecordEtcdRequestLatency("list", s.groupResourceString, startTime)
|
||||
} else {
|
||||
metrics.RecordEtcdRequestLatency("get", s.groupResourceString, startTime)
|
||||
}
|
||||
metrics.RecordEtcdRequest(metricsOp, s.groupResourceString, err, startTime)
|
||||
if err != nil {
|
||||
return interpretListError(err, len(pred.Continue) > 0, continueKey, keyPrefix)
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user