From 3c60c1a10ca1a1d89aef895614ea26e8f90aafbb Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Thu, 6 Oct 2022 13:40:28 +0200 Subject: [PATCH 1/2] node: e2e: add podresources metrics tests add tests to ensure the podresources metrics are exposed, and basic sanity tests for their values. Signed-off-by: Francesco Romani --- test/e2e_node/podresources_test.go | 61 ++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/test/e2e_node/podresources_test.go b/test/e2e_node/podresources_test.go index 8f50548663f..f154c30e3df 100644 --- a/test/e2e_node/podresources_test.go +++ b/test/e2e_node/podresources_test.go @@ -39,7 +39,10 @@ import ( "github.com/onsi/ginkgo/v2" "github.com/onsi/gomega" + "github.com/onsi/gomega/gstruct" + "github.com/onsi/gomega/types" "k8s.io/kubernetes/test/e2e/framework" + e2emetrics "k8s.io/kubernetes/test/e2e/framework/metrics" e2enode "k8s.io/kubernetes/test/e2e/framework/node" e2epod "k8s.io/kubernetes/test/e2e/framework/pod" e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" @@ -821,6 +824,49 @@ var _ = SIGDescribe("POD Resources [Serial] [Feature:PodResources][NodeFeature:P }) }) }) + + ginkgo.Context("when querying /metrics", func() { + ginkgo.BeforeEach(func() { + // ensure APIs have been called at least once + endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket) + framework.ExpectNoError(err) + + cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize) + framework.ExpectNoError(err) + defer conn.Close() + + _, err = cli.List(context.TODO(), &kubeletpodresourcesv1.ListPodResourcesRequest{}) + framework.ExpectNoError(err) + + _, err = cli.GetAllocatableResources(context.TODO(), &kubeletpodresourcesv1.AllocatableResourcesRequest{}) + framework.ExpectNoError(err) + }) + + ginkgo.It("should report the values for the podresources metrics", func() { + // we updated the kubelet config in BeforeEach, so we can assume we start fresh. + // being [Serial], we can also assume noone else but us is running pods. + ginkgo.By("Checking the value of the podresources metrics") + + matchResourceMetrics := gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{ + "kubelet_pod_resources_endpoint_requests_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{ + "": timelessSampleAtLeast(1), + }), + "kubelet_pod_resources_endpoint_requests_list": gstruct.MatchAllElements(nodeID, gstruct.Elements{ + "": timelessSampleAtLeast(1), + }), + "kubelet_pod_resources_endpoint_requests_get_allocatable": gstruct.MatchAllElements(nodeID, gstruct.Elements{ + "": timelessSampleAtLeast(1), + }), + // not checking errors: the calls don't have non-catastrophic (e.g. out of memory) error conditions yet. + }) + + ginkgo.By("Giving the Kubelet time to start up and produce metrics") + gomega.Eventually(getPodResourcesMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics) + ginkgo.By("Ensuring the metrics match the expectations a few more times") + gomega.Consistently(getPodResourcesMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics) + }) + + }) }) func requireLackOfSRIOVDevices() { @@ -910,3 +956,18 @@ func getKubeVirtDevicePluginPod() *v1.Pod { return p } + +func getPodResourcesMetrics() (e2emetrics.KubeletMetrics, error) { + // we are running out of good names, so we need to be unnecessarily specific to avoid clashes + ginkgo.By("getting Pod Resources metrics from the metrics API") + return e2emetrics.GrabKubeletMetricsWithoutProxy(framework.TestContext.NodeName+":10255", "/metrics") +} + +func timelessSampleAtLeast(lower interface{}) types.GomegaMatcher { + return gstruct.PointTo(gstruct.MatchAllFields(gstruct.Fields{ + // We already check Metric when matching the Id + "Metric": gstruct.Ignore(), + "Value": gomega.BeNumerically(">=", lower), + "Timestamp": gstruct.Ignore(), + })) +} From ba6b468982f0c0d879454cac4117777307729976 Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Thu, 6 Oct 2022 14:15:07 +0200 Subject: [PATCH 2/2] node: metrics: register podresources metrics Because of a bug in the commit 1e7bb20c52e452a7ca061ca1dda1936e9df1f266, podresources metrics were added, they are updated in the right places, but they are never exported, so they cannot be consumed. Fix trivially registering the metrics. Signed-off-by: Francesco Romani --- pkg/kubelet/metrics/metrics.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pkg/kubelet/metrics/metrics.go b/pkg/kubelet/metrics/metrics.go index 17267b888d7..99217eb8103 100644 --- a/pkg/kubelet/metrics/metrics.go +++ b/pkg/kubelet/metrics/metrics.go @@ -527,6 +527,16 @@ func Register(collectors ...metrics.StableCollector) { legacyregistry.MustRegister(RunningContainerCount) legacyregistry.MustRegister(RunningPodCount) legacyregistry.MustRegister(ManagedEphemeralContainers) + if utilfeature.DefaultFeatureGate.Enabled(features.KubeletPodResources) { + legacyregistry.MustRegister(PodResourcesEndpointRequestsTotalCount) + + if utilfeature.DefaultFeatureGate.Enabled(features.KubeletPodResourcesGetAllocatable) { + legacyregistry.MustRegister(PodResourcesEndpointRequestsListCount) + legacyregistry.MustRegister(PodResourcesEndpointRequestsGetAllocatableCount) + legacyregistry.MustRegister(PodResourcesEndpointErrorsListCount) + legacyregistry.MustRegister(PodResourcesEndpointErrorsGetAllocatableCount) + } + } legacyregistry.MustRegister(StartedPodsTotal) legacyregistry.MustRegister(StartedPodsErrorsTotal) legacyregistry.MustRegister(StartedContainersTotal) @@ -537,6 +547,7 @@ func Register(collectors ...metrics.StableCollector) { } legacyregistry.MustRegister(RunPodSandboxDuration) legacyregistry.MustRegister(RunPodSandboxErrors) + for _, collector := range collectors { legacyregistry.CustomMustRegister(collector) }