From ab447285badb68ecda94afe4a306fd188d292e0b Mon Sep 17 00:00:00 2001 From: Francesco Giudici Date: Tue, 25 Jan 2022 13:42:39 +0100 Subject: [PATCH] kata-monitor: add kubernetes pod metadata labels to metrics Add the POD metadata we get from the container manager to the metrics by adding more labels. Fixes: #3551 Signed-off-by: Francesco Giudici --- src/runtime/pkg/kata-monitor/metrics.go | 38 ++++++++++++++----- src/runtime/pkg/kata-monitor/metrics_test.go | 12 +++++- src/runtime/pkg/kata-monitor/sandbox_cache.go | 8 ++++ 3 files changed, 46 insertions(+), 12 deletions(-) diff --git a/src/runtime/pkg/kata-monitor/metrics.go b/src/runtime/pkg/kata-monitor/metrics.go index d2958441b9..0216969cb1 100644 --- a/src/runtime/pkg/kata-monitor/metrics.go +++ b/src/runtime/pkg/kata-monitor/metrics.go @@ -160,9 +160,13 @@ func (km *KataMonitor) aggregateSandboxMetrics(encoder expfmt.Encoder) error { // get metrics from sandbox's shim for _, sandboxID := range sandboxes { + sandboxMetadata, ok := km.sandboxCache.getMetadata(sandboxID) + if !ok { // likely the sandbox has been just removed + continue + } wg.Add(1) - go func(sandboxID string, results chan<- []*dto.MetricFamily) { - sandboxMetrics, err := getParsedMetrics(sandboxID) + go func(sandboxID string, sandboxMetadata sandboxKubeData, results chan<- []*dto.MetricFamily) { + sandboxMetrics, err := getParsedMetrics(sandboxID, sandboxMetadata) if err != nil { monitorLog.WithError(err).WithField("sandbox_id", sandboxID).Errorf("failed to get metrics for sandbox") } @@ -170,7 +174,7 @@ func (km *KataMonitor) aggregateSandboxMetrics(encoder expfmt.Encoder) error { results <- sandboxMetrics wg.Done() monitorLog.WithField("sandbox_id", sandboxID).Debug("job finished") - }(sandboxID, results) + }(sandboxID, sandboxMetadata, results) monitorLog.WithField("sandbox_id", sandboxID).Debug("job started") } @@ -219,13 +223,13 @@ func (km *KataMonitor) aggregateSandboxMetrics(encoder expfmt.Encoder) error { } -func getParsedMetrics(sandboxID string) ([]*dto.MetricFamily, error) { +func getParsedMetrics(sandboxID string, sandboxMetadata sandboxKubeData) ([]*dto.MetricFamily, error) { body, err := doGet(sandboxID, defaultTimeout, "metrics") if err != nil { return nil, err } - return parsePrometheusMetrics(sandboxID, body) + return parsePrometheusMetrics(sandboxID, sandboxMetadata, body) } // GetSandboxMetrics will get sandbox's metrics from shim @@ -240,7 +244,7 @@ func GetSandboxMetrics(sandboxID string) (string, error) { // parsePrometheusMetrics will decode metrics from Prometheus text format // and return array of *dto.MetricFamily with an ASC order -func parsePrometheusMetrics(sandboxID string, body []byte) ([]*dto.MetricFamily, error) { +func parsePrometheusMetrics(sandboxID string, sandboxMetadata sandboxKubeData, body []byte) ([]*dto.MetricFamily, error) { reader := bytes.NewReader(body) decoder := expfmt.NewDecoder(reader, expfmt.FmtText) @@ -258,10 +262,24 @@ func parsePrometheusMetrics(sandboxID string, body []byte) ([]*dto.MetricFamily, metricList := mf.Metric for j := range metricList { metric := metricList[j] - metric.Label = append(metric.Label, &dto.LabelPair{ - Name: mutils.String2Pointer("sandbox_id"), - Value: mutils.String2Pointer(sandboxID), - }) + metric.Label = append(metric.Label, + &dto.LabelPair{ + Name: mutils.String2Pointer("sandbox_id"), + Value: mutils.String2Pointer(sandboxID), + }, + &dto.LabelPair{ + Name: mutils.String2Pointer("kube_uid"), + Value: mutils.String2Pointer(sandboxMetadata.uid), + }, + &dto.LabelPair{ + Name: mutils.String2Pointer("kube_name"), + Value: mutils.String2Pointer(sandboxMetadata.name), + }, + &dto.LabelPair{ + Name: mutils.String2Pointer("kube_namespace"), + Value: mutils.String2Pointer(sandboxMetadata.namespace), + }, + ) } // Kata shim are using prometheus go client, add a prefix for metric name to avoid confusing diff --git a/src/runtime/pkg/kata-monitor/metrics_test.go b/src/runtime/pkg/kata-monitor/metrics_test.go index 5263d2a932..1055a6d361 100644 --- a/src/runtime/pkg/kata-monitor/metrics_test.go +++ b/src/runtime/pkg/kata-monitor/metrics_test.go @@ -40,9 +40,10 @@ ttt 999 func TestParsePrometheusMetrics(t *testing.T) { assert := assert.New(t) sandboxID := "sandboxID-abc" + sandboxMetadata := sandboxKubeData{"123", "pod-name", "pod-namespace"} // parse metrics - list, err := parsePrometheusMetrics(sandboxID, []byte(shimMetricBody)) + list, err := parsePrometheusMetrics(sandboxID, sandboxMetadata, []byte(shimMetricBody)) assert.Nil(err, "parsePrometheusMetrics should not return error") assert.Equal(4, len(list), "should return 3 metric families") @@ -56,9 +57,16 @@ func TestParsePrometheusMetrics(t *testing.T) { // get the metric m := mf.Metric[0] - assert.Equal(1, len(m.Label), "should have only 1 labels") + assert.Equal(4, len(m.Label), "should have 4 labels") assert.Equal("sandbox_id", *m.Label[0].Name, "label name should be sandbox_id") assert.Equal(sandboxID, *m.Label[0].Value, "label value should be", sandboxID) + assert.Equal("kube_uid", *m.Label[1].Name, "label name should be kube_uid") + assert.Equal(sandboxMetadata.uid, *m.Label[1].Value, "label value should be", sandboxMetadata.uid) + + assert.Equal("kube_name", *m.Label[2].Name, "label name should be kube_name") + assert.Equal(sandboxMetadata.name, *m.Label[2].Value, "label value should be", sandboxMetadata.name) + assert.Equal("kube_namespace", *m.Label[3].Name, "label name should be kube_namespace") + assert.Equal(sandboxMetadata.namespace, *m.Label[3].Value, "label value should be", sandboxMetadata.namespace) summary := m.Summary assert.NotNil(summary, "summary should not be nil") diff --git a/src/runtime/pkg/kata-monitor/sandbox_cache.go b/src/runtime/pkg/kata-monitor/sandbox_cache.go index 4e3e778455..ba98a121f2 100644 --- a/src/runtime/pkg/kata-monitor/sandbox_cache.go +++ b/src/runtime/pkg/kata-monitor/sandbox_cache.go @@ -62,3 +62,11 @@ func (sc *sandboxCache) setMetadata(id string, value sandboxKubeData) { sc.sandboxes[id] = value } + +func (sc *sandboxCache) getMetadata(id string) (sandboxKubeData, bool) { + sc.Lock() + defer sc.Unlock() + + metadata, ok := sc.sandboxes[id] + return metadata, ok +}