diff --git a/src/runtime/cmd/kata-monitor/README.md b/src/runtime/cmd/kata-monitor/README.md index 5ebbc8cdac..f6fcec1d3d 100644 --- a/src/runtime/cmd/kata-monitor/README.md +++ b/src/runtime/cmd/kata-monitor/README.md @@ -52,6 +52,8 @@ The **log-level** allows the chose how verbose the logs should be. The default i **NOTE: The debug endpoints are available only if the [Kata Containers configuration file](https://github.com/kata-containers/kata-containers/blob/9d5b03a1b70bbd175237ec4b9f821d6ccee0a1f6/src/runtime/config/configuration-qemu.toml.in#L590-L592) includes** `enable_pprof = true` **in the** `[runtime]` **section**. +The `/metrics` has a query parameter `filter_family`, which filter Kata sandboxes metrics with specific names. If `filter_family` is set to `A` (and `B`, split with `,`), metrics with prefix `A` (and `B`) will only be returned. + The `/sandboxes` endpoint lists the _sandbox ID_ of all the detected Kata runtimes. If accessed via a web browser, it provides html links to the endpoints available for each sandbox. In order to retrieve data for a specific Kata workload, the _sandbox ID_ should be passed in the query string using the _sandbox_ key. The `/agent-url`, and all the `/debug/`* endpoints require `sandbox_id` to be specified in the query string. diff --git a/src/runtime/pkg/kata-monitor/metrics.go b/src/runtime/pkg/kata-monitor/metrics.go index 98ecb68f09..e45a8f19dc 100644 --- a/src/runtime/pkg/kata-monitor/metrics.go +++ b/src/runtime/pkg/kata-monitor/metrics.go @@ -114,25 +114,32 @@ func (km *KataMonitor) ProcessMetricsRequest(w http.ResponseWriter, r *http.Requ writer = gz } - // create encoder to encode metrics. - encoder := expfmt.NewEncoder(writer, contentType) - - // gather metrics collected for management agent. - mfs, err := prometheus.DefaultGatherer.Gather() + filterFamilies, err := getFilterFamilyFromReq(r) if err != nil { - monitorLog.WithError(err).Error("failed to Gather metrics from prometheus.DefaultGatherer") - w.WriteHeader(http.StatusInternalServerError) - w.Write([]byte(err.Error())) return } - // encode metric gathered in current process - if err := encodeMetricFamily(mfs, encoder); err != nil { - monitorLog.WithError(err).Warnf("failed to encode metrics") + // create encoder to encode metrics. + encoder := expfmt.NewEncoder(writer, contentType) + + if len(filterFamilies) == 0 { + // gather metrics collected for management agent. + mfs, err := prometheus.DefaultGatherer.Gather() + if err != nil { + monitorLog.WithError(err).Error("failed to Gather metrics from prometheus.DefaultGatherer") + w.WriteHeader(http.StatusInternalServerError) + w.Write([]byte(err.Error())) + return + } + + // encode metric gathered in current process + if err := encodeMetricFamily(mfs, encoder); err != nil { + monitorLog.WithError(err).Warnf("failed to encode metrics") + } } // aggregate sandboxes metrics and write to response by encoder - if err := km.aggregateSandboxMetrics(encoder); err != nil { + if err := km.aggregateSandboxMetrics(encoder, filterFamilies); err != nil { monitorLog.WithError(err).Errorf("failed aggregateSandboxMetrics") scrapeFailedCount.Inc() } @@ -155,7 +162,7 @@ func encodeMetricFamily(mfs []*dto.MetricFamily, encoder expfmt.Encoder) error { } // aggregateSandboxMetrics will get metrics from one sandbox and do some process -func (km *KataMonitor) aggregateSandboxMetrics(encoder expfmt.Encoder) error { +func (km *KataMonitor) aggregateSandboxMetrics(encoder expfmt.Encoder, filterFamilies []string) error { // get all kata sandboxes from cache sandboxes := km.sandboxCache.getSandboxList() // save running kata pods as a metrics. @@ -230,9 +237,21 @@ func (km *KataMonitor) aggregateSandboxMetrics(encoder expfmt.Encoder) error { } // write metrics to response. - for _, mf := range metricsMap { - if err := encoder.Encode(mf); err != nil { - return err + if len(filterFamilies) > 0 { + for _, filterName := range filterFamilies { + for fullName, mf := range metricsMap { + if strings.HasPrefix(fullName, filterName) { + if err := encoder.Encode(mf); err != nil { + return err + } + } + } + } + } else { + for _, mf := range metricsMap { + if err := encoder.Encode(mf); err != nil { + return err + } } } return nil diff --git a/src/runtime/pkg/kata-monitor/shim_client.go b/src/runtime/pkg/kata-monitor/shim_client.go index 388ac6fff5..3730c8af0a 100644 --- a/src/runtime/pkg/kata-monitor/shim_client.go +++ b/src/runtime/pkg/kata-monitor/shim_client.go @@ -8,6 +8,7 @@ package katamonitor import ( "fmt" "net/http" + "strings" "time" shim "github.com/kata-containers/kata-containers/src/runtime/pkg/containerd-shim-v2" @@ -36,3 +37,11 @@ func getSandboxIDFromReq(r *http.Request) (string, error) { func getSandboxFS() string { return shim.GetSandboxesStoragePath() } + +func getFilterFamilyFromReq(r *http.Request) ([]string, error) { + filterFamilies := r.URL.Query().Get("filter_family") + if filterFamilies != "" { + return strings.Split(filterFamilies, ","), nil + } + return nil, nil +}