mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-09 20:17:41 +00:00
apiserver/etcd: add request and request_error metrics (#117222)
* apiserver/etcd: add request and request_error metrics Signed-off-by: iyear <ljyngup@gmail.com> * apiserver/etcd: rename etcdRequestCounts metric Signed-off-by: iyear <ljyngup@gmail.com> --------- Signed-off-by: iyear <ljyngup@gmail.com>
This commit is contained in:
parent
d09842e0ad
commit
4f6714ef42
@ -47,6 +47,22 @@ var (
|
|||||||
},
|
},
|
||||||
[]string{"operation", "type"},
|
[]string{"operation", "type"},
|
||||||
)
|
)
|
||||||
|
etcdRequestCounts = compbasemetrics.NewCounterVec(
|
||||||
|
&compbasemetrics.CounterOpts{
|
||||||
|
Name: "etcd_requests_total",
|
||||||
|
Help: "Etcd request counts for each operation and object type.",
|
||||||
|
StabilityLevel: compbasemetrics.ALPHA,
|
||||||
|
},
|
||||||
|
[]string{"operation", "type"},
|
||||||
|
)
|
||||||
|
etcdRequestErrorCounts = compbasemetrics.NewCounterVec(
|
||||||
|
&compbasemetrics.CounterOpts{
|
||||||
|
Name: "etcd_request_errors_total",
|
||||||
|
Help: "Etcd failed request counts for each operation and object type.",
|
||||||
|
StabilityLevel: compbasemetrics.ALPHA,
|
||||||
|
},
|
||||||
|
[]string{"operation", "type"},
|
||||||
|
)
|
||||||
objectCounts = compbasemetrics.NewGaugeVec(
|
objectCounts = compbasemetrics.NewGaugeVec(
|
||||||
&compbasemetrics.GaugeOpts{
|
&compbasemetrics.GaugeOpts{
|
||||||
Name: "apiserver_storage_objects",
|
Name: "apiserver_storage_objects",
|
||||||
@ -140,6 +156,8 @@ func Register() {
|
|||||||
// Register the metrics.
|
// Register the metrics.
|
||||||
registerMetrics.Do(func() {
|
registerMetrics.Do(func() {
|
||||||
legacyregistry.MustRegister(etcdRequestLatency)
|
legacyregistry.MustRegister(etcdRequestLatency)
|
||||||
|
legacyregistry.MustRegister(etcdRequestCounts)
|
||||||
|
legacyregistry.MustRegister(etcdRequestErrorCounts)
|
||||||
legacyregistry.MustRegister(objectCounts)
|
legacyregistry.MustRegister(objectCounts)
|
||||||
legacyregistry.MustRegister(dbTotalSize)
|
legacyregistry.MustRegister(dbTotalSize)
|
||||||
legacyregistry.MustRegister(etcdBookmarkCounts)
|
legacyregistry.MustRegister(etcdBookmarkCounts)
|
||||||
@ -157,9 +175,15 @@ func UpdateObjectCount(resourcePrefix string, count int64) {
|
|||||||
objectCounts.WithLabelValues(resourcePrefix).Set(float64(count))
|
objectCounts.WithLabelValues(resourcePrefix).Set(float64(count))
|
||||||
}
|
}
|
||||||
|
|
||||||
// RecordEtcdRequestLatency sets the etcd_request_duration_seconds metrics.
|
// RecordEtcdRequest updates and sets the etcd_request_duration_seconds,
|
||||||
func RecordEtcdRequestLatency(verb, resource string, startTime time.Time) {
|
// etcd_request_total, etcd_request_errors_total metrics.
|
||||||
etcdRequestLatency.WithLabelValues(verb, resource).Observe(sinceInSeconds(startTime))
|
func RecordEtcdRequest(verb, resource string, err error, startTime time.Time) {
|
||||||
|
v := []string{verb, resource}
|
||||||
|
etcdRequestLatency.WithLabelValues(v...).Observe(sinceInSeconds(startTime))
|
||||||
|
etcdRequestCounts.WithLabelValues(v...).Inc()
|
||||||
|
if err != nil {
|
||||||
|
etcdRequestErrorCounts.WithLabelValues(v...).Inc()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// RecordEtcdEvent updated the etcd_events_received_total metric.
|
// RecordEtcdEvent updated the etcd_events_received_total metric.
|
||||||
@ -183,7 +207,9 @@ func Reset() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// sinceInSeconds gets the time since the specified start in seconds.
|
// sinceInSeconds gets the time since the specified start in seconds.
|
||||||
func sinceInSeconds(start time.Time) float64 {
|
//
|
||||||
|
// This is a variable to facilitate testing.
|
||||||
|
var sinceInSeconds = func(start time.Time) float64 {
|
||||||
return time.Since(start).Seconds()
|
return time.Since(start).Seconds()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -17,8 +17,10 @@ limitations under the License.
|
|||||||
package metrics
|
package metrics
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"errors"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
"k8s.io/component-base/metrics"
|
"k8s.io/component-base/metrics"
|
||||||
"k8s.io/component-base/metrics/testutil"
|
"k8s.io/component-base/metrics/testutil"
|
||||||
@ -55,3 +57,125 @@ func TestRecordDecodeError(t *testing.T) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestRecordEtcdRequest(t *testing.T) {
|
||||||
|
registry := metrics.NewKubeRegistry()
|
||||||
|
|
||||||
|
// modify default sinceInSeconds to constant NOW
|
||||||
|
sinceInSeconds = func(t time.Time) float64 {
|
||||||
|
return time.Unix(0, 300*int64(time.Millisecond)).Sub(t).Seconds()
|
||||||
|
}
|
||||||
|
|
||||||
|
testedMetrics := []metrics.Registerable{
|
||||||
|
etcdRequestCounts,
|
||||||
|
etcdRequestErrorCounts,
|
||||||
|
etcdRequestLatency,
|
||||||
|
}
|
||||||
|
|
||||||
|
testedMetricsName := make([]string, 0, len(testedMetrics))
|
||||||
|
for _, m := range testedMetrics {
|
||||||
|
registry.MustRegister(m)
|
||||||
|
testedMetricsName = append(testedMetricsName, m.FQName())
|
||||||
|
}
|
||||||
|
|
||||||
|
testCases := []struct {
|
||||||
|
desc string
|
||||||
|
operation string
|
||||||
|
typ string
|
||||||
|
err error
|
||||||
|
startTime time.Time
|
||||||
|
want string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
desc: "success_request",
|
||||||
|
operation: "foo",
|
||||||
|
typ: "bar",
|
||||||
|
err: nil,
|
||||||
|
startTime: time.Unix(0, 0), // 0.3s
|
||||||
|
want: `# HELP etcd_request_duration_seconds [ALPHA] Etcd request latency in seconds for each operation and object type.
|
||||||
|
# TYPE etcd_request_duration_seconds histogram
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.005"} 0
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.025"} 0
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.05"} 0
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.1"} 0
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.2"} 0
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.4"} 1
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.6"} 1
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.8"} 1
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="1"} 1
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="1.25"} 1
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="1.5"} 1
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="2"} 1
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="3"} 1
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="4"} 1
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="5"} 1
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="6"} 1
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="8"} 1
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="10"} 1
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="15"} 1
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="20"} 1
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="30"} 1
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="45"} 1
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="60"} 1
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="+Inf"} 1
|
||||||
|
etcd_request_duration_seconds_sum{operation="foo",type="bar"} 0.3
|
||||||
|
etcd_request_duration_seconds_count{operation="foo",type="bar"} 1
|
||||||
|
# HELP etcd_requests_total [ALPHA] Etcd request counts for each operation and object type.
|
||||||
|
# TYPE etcd_requests_total counter
|
||||||
|
etcd_requests_total{operation="foo",type="bar"} 1
|
||||||
|
`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
desc: "failed_request",
|
||||||
|
operation: "foo",
|
||||||
|
typ: "bar",
|
||||||
|
err: errors.New("some error"),
|
||||||
|
startTime: time.Unix(0, 0), // 0.3s
|
||||||
|
want: `# HELP etcd_request_duration_seconds [ALPHA] Etcd request latency in seconds for each operation and object type.
|
||||||
|
# TYPE etcd_request_duration_seconds histogram
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.005"} 0
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.025"} 0
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.05"} 0
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.1"} 0
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.2"} 0
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.4"} 1
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.6"} 1
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.8"} 1
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="1"} 1
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="1.25"} 1
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="1.5"} 1
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="2"} 1
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="3"} 1
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="4"} 1
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="5"} 1
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="6"} 1
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="8"} 1
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="10"} 1
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="15"} 1
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="20"} 1
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="30"} 1
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="45"} 1
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="60"} 1
|
||||||
|
etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="+Inf"} 1
|
||||||
|
etcd_request_duration_seconds_sum{operation="foo",type="bar"} 0.3
|
||||||
|
etcd_request_duration_seconds_count{operation="foo",type="bar"} 1
|
||||||
|
# HELP etcd_requests_total [ALPHA] Etcd request counts for each operation and object type.
|
||||||
|
# TYPE etcd_requests_total counter
|
||||||
|
etcd_requests_total{operation="foo",type="bar"} 1
|
||||||
|
# HELP etcd_request_errors_total [ALPHA] Etcd failed request counts for each operation and object type.
|
||||||
|
# TYPE etcd_request_errors_total counter
|
||||||
|
etcd_request_errors_total{operation="foo",type="bar"} 1
|
||||||
|
`,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range testCases {
|
||||||
|
t.Run(test.desc, func(t *testing.T) {
|
||||||
|
defer registry.Reset()
|
||||||
|
RecordEtcdRequest(test.operation, test.typ, test.err, test.startTime)
|
||||||
|
if err := testutil.GatherAndCompare(registry, strings.NewReader(test.want), testedMetricsName...); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -136,7 +136,7 @@ func (s *store) Get(ctx context.Context, key string, opts storage.GetOptions, ou
|
|||||||
}
|
}
|
||||||
startTime := time.Now()
|
startTime := time.Now()
|
||||||
getResp, err := s.client.KV.Get(ctx, preparedKey)
|
getResp, err := s.client.KV.Get(ctx, preparedKey)
|
||||||
metrics.RecordEtcdRequestLatency("get", s.groupResourceString, startTime)
|
metrics.RecordEtcdRequest("get", s.groupResourceString, err, startTime)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -210,7 +210,7 @@ func (s *store) Create(ctx context.Context, key string, obj, out runtime.Object,
|
|||||||
).Then(
|
).Then(
|
||||||
clientv3.OpPut(preparedKey, string(newData), opts...),
|
clientv3.OpPut(preparedKey, string(newData), opts...),
|
||||||
).Commit()
|
).Commit()
|
||||||
metrics.RecordEtcdRequestLatency("create", s.groupResourceString, startTime)
|
metrics.RecordEtcdRequest("create", s.groupResourceString, err, startTime)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
span.AddEvent("Txn call failed", attribute.String("err", err.Error()))
|
span.AddEvent("Txn call failed", attribute.String("err", err.Error()))
|
||||||
return err
|
return err
|
||||||
@ -255,7 +255,7 @@ func (s *store) conditionalDelete(
|
|||||||
getCurrentState := func() (*objState, error) {
|
getCurrentState := func() (*objState, error) {
|
||||||
startTime := time.Now()
|
startTime := time.Now()
|
||||||
getResp, err := s.client.KV.Get(ctx, key)
|
getResp, err := s.client.KV.Get(ctx, key)
|
||||||
metrics.RecordEtcdRequestLatency("get", s.groupResourceString, startTime)
|
metrics.RecordEtcdRequest("get", s.groupResourceString, err, startTime)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -337,7 +337,7 @@ func (s *store) conditionalDelete(
|
|||||||
).Else(
|
).Else(
|
||||||
clientv3.OpGet(key),
|
clientv3.OpGet(key),
|
||||||
).Commit()
|
).Commit()
|
||||||
metrics.RecordEtcdRequestLatency("delete", s.groupResourceString, startTime)
|
metrics.RecordEtcdRequest("delete", s.groupResourceString, err, startTime)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -391,7 +391,7 @@ func (s *store) GuaranteedUpdate(
|
|||||||
getCurrentState := func() (*objState, error) {
|
getCurrentState := func() (*objState, error) {
|
||||||
startTime := time.Now()
|
startTime := time.Now()
|
||||||
getResp, err := s.client.KV.Get(ctx, preparedKey)
|
getResp, err := s.client.KV.Get(ctx, preparedKey)
|
||||||
metrics.RecordEtcdRequestLatency("get", s.groupResourceString, startTime)
|
metrics.RecordEtcdRequest("get", s.groupResourceString, err, startTime)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -512,7 +512,7 @@ func (s *store) GuaranteedUpdate(
|
|||||||
).Else(
|
).Else(
|
||||||
clientv3.OpGet(preparedKey),
|
clientv3.OpGet(preparedKey),
|
||||||
).Commit()
|
).Commit()
|
||||||
metrics.RecordEtcdRequestLatency("update", s.groupResourceString, startTime)
|
metrics.RecordEtcdRequest("update", s.groupResourceString, err, startTime)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
span.AddEvent("Txn call failed", attribute.String("err", err.Error()))
|
span.AddEvent("Txn call failed", attribute.String("err", err.Error()))
|
||||||
return err
|
return err
|
||||||
@ -575,7 +575,7 @@ func (s *store) Count(key string) (int64, error) {
|
|||||||
|
|
||||||
startTime := time.Now()
|
startTime := time.Now()
|
||||||
getResp, err := s.client.KV.Get(context.Background(), preparedKey, clientv3.WithRange(clientv3.GetPrefixRangeEnd(preparedKey)), clientv3.WithCountOnly())
|
getResp, err := s.client.KV.Get(context.Background(), preparedKey, clientv3.WithRange(clientv3.GetPrefixRangeEnd(preparedKey)), clientv3.WithCountOnly())
|
||||||
metrics.RecordEtcdRequestLatency("listWithCount", preparedKey, startTime)
|
metrics.RecordEtcdRequest("listWithCount", preparedKey, err, startTime)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, err
|
return 0, err
|
||||||
}
|
}
|
||||||
@ -720,14 +720,16 @@ func (s *store) GetList(ctx context.Context, key string, opts storage.ListOption
|
|||||||
numReturn := v.Len()
|
numReturn := v.Len()
|
||||||
metrics.RecordStorageListMetrics(s.groupResourceString, numFetched, numEvald, numReturn)
|
metrics.RecordStorageListMetrics(s.groupResourceString, numFetched, numEvald, numReturn)
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
metricsOp := "get"
|
||||||
|
if recursive {
|
||||||
|
metricsOp = "list"
|
||||||
|
}
|
||||||
|
|
||||||
for {
|
for {
|
||||||
startTime := time.Now()
|
startTime := time.Now()
|
||||||
getResp, err = s.client.KV.Get(ctx, preparedKey, options...)
|
getResp, err = s.client.KV.Get(ctx, preparedKey, options...)
|
||||||
if recursive {
|
metrics.RecordEtcdRequest(metricsOp, s.groupResourceString, err, startTime)
|
||||||
metrics.RecordEtcdRequestLatency("list", s.groupResourceString, startTime)
|
|
||||||
} else {
|
|
||||||
metrics.RecordEtcdRequestLatency("get", s.groupResourceString, startTime)
|
|
||||||
}
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return interpretListError(err, len(pred.Continue) > 0, continueKey, keyPrefix)
|
return interpretListError(err, len(pred.Continue) > 0, continueKey, keyPrefix)
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user