mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-07 11:13:48 +00:00
Merge pull request #8777 from fgrzadkowski/print_top_latency
Print top 5 latency metrics even if they are below threshold
This commit is contained in:
commit
702a5e27e9
@ -25,6 +25,7 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"sort"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
@ -1024,13 +1025,21 @@ func getSigner(provider string) (ssh.Signer, error) {
|
|||||||
// LatencyMetrics stores data about request latency at a given quantile
|
// LatencyMetrics stores data about request latency at a given quantile
|
||||||
// broken down by verb (e.g. GET, PUT, LIST) and resource (e.g. pods, services).
|
// broken down by verb (e.g. GET, PUT, LIST) and resource (e.g. pods, services).
|
||||||
type LatencyMetric struct {
|
type LatencyMetric struct {
|
||||||
verb string
|
Verb string
|
||||||
resource string
|
Resource string
|
||||||
// 0 <= quantile <=1, e.g. 0.95 is 95%tile, 0.5 is median.
|
// 0 <= quantile <=1, e.g. 0.95 is 95%tile, 0.5 is median.
|
||||||
quantile float64
|
Quantile float64
|
||||||
latency time.Duration
|
Latency time.Duration
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// LatencyMetricByLatency implements sort.Interface for []LatencyMetric based on
|
||||||
|
// the latency field.
|
||||||
|
type LatencyMetricByLatency []LatencyMetric
|
||||||
|
|
||||||
|
func (a LatencyMetricByLatency) Len() int { return len(a) }
|
||||||
|
func (a LatencyMetricByLatency) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
|
||||||
|
func (a LatencyMetricByLatency) Less(i, j int) bool { return a[i].Latency < a[j].Latency }
|
||||||
|
|
||||||
func ReadLatencyMetrics(c *client.Client) ([]LatencyMetric, error) {
|
func ReadLatencyMetrics(c *client.Client) ([]LatencyMetric, error) {
|
||||||
body, err := c.Get().AbsPath("/metrics").DoRaw()
|
body, err := c.Get().AbsPath("/metrics").DoRaw()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -1075,16 +1084,28 @@ func HighLatencyRequests(c *client.Client, threshold time.Duration, ignoredResou
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, err
|
return 0, err
|
||||||
}
|
}
|
||||||
|
sort.Sort(sort.Reverse(LatencyMetricByLatency(metrics)))
|
||||||
var badMetrics []LatencyMetric
|
var badMetrics []LatencyMetric
|
||||||
|
top := 5
|
||||||
for _, metric := range metrics {
|
for _, metric := range metrics {
|
||||||
if !ignoredResources.Has(metric.resource) &&
|
if ignoredResources.Has(metric.Resource) || ignoredVerbs.Has(metric.Verb) {
|
||||||
!ignoredVerbs.Has(metric.verb) &&
|
continue
|
||||||
|
}
|
||||||
|
isBad := false
|
||||||
|
if metric.Latency > threshold &&
|
||||||
// We are only interested in 99%tile, but for logging purposes
|
// We are only interested in 99%tile, but for logging purposes
|
||||||
// it's useful to have all the offending percentiles.
|
// it's useful to have all the offending percentiles.
|
||||||
metric.quantile <= 0.99 &&
|
metric.Quantile <= 0.99 {
|
||||||
metric.latency > threshold {
|
|
||||||
Logf("WARNING - requests with too high latency: %+v", metric)
|
|
||||||
badMetrics = append(badMetrics, metric)
|
badMetrics = append(badMetrics, metric)
|
||||||
|
isBad = true
|
||||||
|
}
|
||||||
|
if top > 0 || isBad {
|
||||||
|
top--
|
||||||
|
prefix := ""
|
||||||
|
if isBad {
|
||||||
|
prefix = "WARNING "
|
||||||
|
}
|
||||||
|
Logf("%vTop latency metric: %+v", prefix, metric)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user