runtime: add filter metrics with specific names

The kata monitor metrics API returns a huge size response,
if containers or sandboxs are a large number,
focus on what we need will be harder.

Fixes: #6500

Signed-off-by: Miao Xia <xia.miao1@zte.com.cn>
This commit is contained in:
Miao Xia 2023-03-20 14:53:40 +08:00
parent 96252db787
commit 0f73515561
3 changed files with 46 additions and 16 deletions

View File

@ -52,6 +52,8 @@ The **log-level** allows the chose how verbose the logs should be. The default i
**NOTE: The debug endpoints are available only if the [Kata Containers configuration file](https://github.com/kata-containers/kata-containers/blob/9d5b03a1b70bbd175237ec4b9f821d6ccee0a1f6/src/runtime/config/configuration-qemu.toml.in#L590-L592) includes** `enable_pprof = true` **in the** `[runtime]` **section**.
The `/metrics` has a query parameter `filter_family`, which filter Kata sandboxes metrics with specific names. If `filter_family` is set to `A` (and `B`, split with `,`), metrics with prefix `A` (and `B`) will only be returned.
The `/sandboxes` endpoint lists the _sandbox ID_ of all the detected Kata runtimes. If accessed via a web browser, it provides html links to the endpoints available for each sandbox.
In order to retrieve data for a specific Kata workload, the _sandbox ID_ should be passed in the query string using the _sandbox_ key. The `/agent-url`, and all the `/debug/`* endpoints require `sandbox_id` to be specified in the query string.

View File

@ -114,25 +114,32 @@ func (km *KataMonitor) ProcessMetricsRequest(w http.ResponseWriter, r *http.Requ
writer = gz
}
// create encoder to encode metrics.
encoder := expfmt.NewEncoder(writer, contentType)
// gather metrics collected for management agent.
mfs, err := prometheus.DefaultGatherer.Gather()
filterFamilies, err := getFilterFamilyFromReq(r)
if err != nil {
monitorLog.WithError(err).Error("failed to Gather metrics from prometheus.DefaultGatherer")
w.WriteHeader(http.StatusInternalServerError)
w.Write([]byte(err.Error()))
return
}
// encode metric gathered in current process
if err := encodeMetricFamily(mfs, encoder); err != nil {
monitorLog.WithError(err).Warnf("failed to encode metrics")
// create encoder to encode metrics.
encoder := expfmt.NewEncoder(writer, contentType)
if len(filterFamilies) == 0 {
// gather metrics collected for management agent.
mfs, err := prometheus.DefaultGatherer.Gather()
if err != nil {
monitorLog.WithError(err).Error("failed to Gather metrics from prometheus.DefaultGatherer")
w.WriteHeader(http.StatusInternalServerError)
w.Write([]byte(err.Error()))
return
}
// encode metric gathered in current process
if err := encodeMetricFamily(mfs, encoder); err != nil {
monitorLog.WithError(err).Warnf("failed to encode metrics")
}
}
// aggregate sandboxes metrics and write to response by encoder
if err := km.aggregateSandboxMetrics(encoder); err != nil {
if err := km.aggregateSandboxMetrics(encoder, filterFamilies); err != nil {
monitorLog.WithError(err).Errorf("failed aggregateSandboxMetrics")
scrapeFailedCount.Inc()
}
@ -155,7 +162,7 @@ func encodeMetricFamily(mfs []*dto.MetricFamily, encoder expfmt.Encoder) error {
}
// aggregateSandboxMetrics will get metrics from one sandbox and do some process
func (km *KataMonitor) aggregateSandboxMetrics(encoder expfmt.Encoder) error {
func (km *KataMonitor) aggregateSandboxMetrics(encoder expfmt.Encoder, filterFamilies []string) error {
// get all kata sandboxes from cache
sandboxes := km.sandboxCache.getSandboxList()
// save running kata pods as a metrics.
@ -230,9 +237,21 @@ func (km *KataMonitor) aggregateSandboxMetrics(encoder expfmt.Encoder) error {
}
// write metrics to response.
for _, mf := range metricsMap {
if err := encoder.Encode(mf); err != nil {
return err
if len(filterFamilies) > 0 {
for _, filterName := range filterFamilies {
for fullName, mf := range metricsMap {
if strings.HasPrefix(fullName, filterName) {
if err := encoder.Encode(mf); err != nil {
return err
}
}
}
}
} else {
for _, mf := range metricsMap {
if err := encoder.Encode(mf); err != nil {
return err
}
}
}
return nil

View File

@ -8,6 +8,7 @@ package katamonitor
import (
"fmt"
"net/http"
"strings"
"time"
shim "github.com/kata-containers/kata-containers/src/runtime/pkg/containerd-shim-v2"
@ -36,3 +37,11 @@ func getSandboxIDFromReq(r *http.Request) (string, error) {
func getSandboxFS() string {
return shim.GetSandboxesStoragePath()
}
func getFilterFamilyFromReq(r *http.Request) ([]string, error) {
filterFamilies := r.URL.Query().Get("filter_family")
if filterFamilies != "" {
return strings.Split(filterFamilies, ","), nil
}
return nil, nil
}