From f97abdbee07d47b474ecb38049b879b2b91e0cc0 Mon Sep 17 00:00:00 2001 From: David Pait Date: Fri, 6 Sep 2024 08:16:49 -0400 Subject: [PATCH] allow ContainerResource calculations to continue with missing metrics like Resource calculations --- .../podautoscaler/metrics/client.go | 11 +++--- .../podautoscaler/metrics/client_test.go | 17 +++++++++- .../podautoscaler/metrics/interfaces.go | 1 + .../podautoscaler/replica_calculator_test.go | 34 +++++++++++-------- 4 files changed, 41 insertions(+), 22 deletions(-) diff --git a/pkg/controller/podautoscaler/metrics/client.go b/pkg/controller/podautoscaler/metrics/client.go index 03ecc22c443..71f2cc616bb 100644 --- a/pkg/controller/podautoscaler/metrics/client.go +++ b/pkg/controller/podautoscaler/metrics/client.go @@ -75,10 +75,7 @@ func (c *resourceMetricsClient) GetResourceMetric(ctx context.Context, resource } var res PodMetricsInfo if container != "" { - res, err = getContainerMetrics(metrics.Items, resource, container) - if err != nil { - return nil, time.Time{}, fmt.Errorf("failed to get container metrics: %v", err) - } + res = getContainerMetrics(ctx, metrics.Items, resource, container) } else { res = getPodMetrics(ctx, metrics.Items, resource) } @@ -86,7 +83,7 @@ func (c *resourceMetricsClient) GetResourceMetric(ctx context.Context, resource return res, timestamp, nil } -func getContainerMetrics(rawMetrics []metricsapi.PodMetrics, resource v1.ResourceName, container string) (PodMetricsInfo, error) { +func getContainerMetrics(ctx context.Context, rawMetrics []metricsapi.PodMetrics, resource v1.ResourceName, container string) PodMetricsInfo { res := make(PodMetricsInfo, len(rawMetrics)) for _, m := range rawMetrics { containerFound := false @@ -104,10 +101,10 @@ func getContainerMetrics(rawMetrics []metricsapi.PodMetrics, resource v1.Resourc } } if !containerFound { - return nil, fmt.Errorf("container %s not present in metrics for pod %s/%s", container, m.Namespace, m.Name) + klog.FromContext(ctx).V(2).Info("Missing container metric", "container", container, "pod", klog.KRef(m.Namespace, m.Name)) } } - return res, nil + return res } func getPodMetrics(ctx context.Context, rawMetrics []metricsapi.PodMetrics, resource v1.ResourceName) PodMetricsInfo { diff --git a/pkg/controller/podautoscaler/metrics/client_test.go b/pkg/controller/podautoscaler/metrics/client_test.go index 04418c6c0f7..59fbcc804aa 100644 --- a/pkg/controller/podautoscaler/metrics/client_test.go +++ b/pkg/controller/podautoscaler/metrics/client_test.go @@ -429,7 +429,22 @@ func TestRESTClientContainerCPUEmptyMetricsForOnePod(t *testing.T) { container: "test-1", targetTimestamp: targetTimestamp, window: window, - desiredError: fmt.Errorf("failed to get container metrics"), + reportedPodMetrics: []map[string]int64{{"test-1": 100}, {"test-1": 300, "test-2": 400}, {}}, + } + tc.runTest(t) +} + +func TestRESTClientContainerCPUEmptyMetricsForOnePodReturnsOnlyFoundContainer(t *testing.T) { + targetTimestamp := 1 + window := 30 * time.Second + tc := restClientTestCase{ + resourceName: v1.ResourceCPU, + desiredMetricValues: PodMetricsInfo{ + "test-pod-1": {Value: 400, Timestamp: offsetTimestampBy(targetTimestamp), Window: window}, + }, + container: "test-2", + targetTimestamp: targetTimestamp, + window: window, reportedPodMetrics: []map[string]int64{{"test-1": 100}, {"test-1": 300, "test-2": 400}, {}}, } tc.runTest(t) diff --git a/pkg/controller/podautoscaler/metrics/interfaces.go b/pkg/controller/podautoscaler/metrics/interfaces.go index 6b9b9f1aebf..bd1b6d70578 100644 --- a/pkg/controller/podautoscaler/metrics/interfaces.go +++ b/pkg/controller/podautoscaler/metrics/interfaces.go @@ -41,6 +41,7 @@ type MetricsClient interface { // GetResourceMetric gets the given resource metric (and an associated oldest timestamp) // for the specified named container in all pods matching the specified selector in the given namespace and when // the container is an empty string it returns the sum of all the container metrics. + // Missing metrics will not error and callers should rely on Pod status to filter metrics info returned from this interface. GetResourceMetric(ctx context.Context, resource v1.ResourceName, namespace string, selector labels.Selector, container string) (PodMetricsInfo, time.Time, error) // GetRawMetric gets the given metric (and an associated oldest timestamp) diff --git a/pkg/controller/podautoscaler/replica_calculator_test.go b/pkg/controller/podautoscaler/replica_calculator_test.go index 6a8653378b5..c90871bd4e8 100644 --- a/pkg/controller/podautoscaler/replica_calculator_test.go +++ b/pkg/controller/podautoscaler/replica_calculator_test.go @@ -446,20 +446,6 @@ func TestReplicaCalcDisjointResourcesMetrics(t *testing.T) { tc.runTest(t) } -func TestReplicaCalcMissingContainerMetricError(t *testing.T) { - tc := replicaCalcTestCase{ - currentReplicas: 1, - expectedError: fmt.Errorf("container container2 not present in metrics for pod test-namespace/test-pod-0"), - resource: &resourceInfo{ - name: v1.ResourceCPU, - requests: []resource.Quantity{resource.MustParse("1.0")}, - levels: [][]int64{{0}}, - }, - container: "container2", - } - tc.runTest(t) -} - func TestReplicaCalcScaleUp(t *testing.T) { tc := replicaCalcTestCase{ currentReplicas: 3, @@ -606,6 +592,26 @@ func TestReplicaCalcScaleUpIgnoresFailedPods(t *testing.T) { tc.runTest(t) } +func TestReplicaCalcMissingContainerMetricScaleUpIgnoresFailedPods(t *testing.T) { + tc := replicaCalcTestCase{ + currentReplicas: 2, + expectedReplicas: 4, + podReadiness: []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionTrue, v1.ConditionFalse, v1.ConditionFalse}, + podPhase: []v1.PodPhase{v1.PodRunning, v1.PodRunning, v1.PodFailed, v1.PodFailed}, + resource: &resourceInfo{ + name: v1.ResourceCPU, + requests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, + levels: [][]int64{{1000, 500}, {9000, 700}, {1000}, {9000}}, + + targetUtilization: 30, + expectedUtilization: 60, + expectedValue: 600, + }, + container: "container2", + } + tc.runTest(t) +} + func TestReplicaCalcScaleUpContainerIgnoresFailedPods(t *testing.T) { tc := replicaCalcTestCase{ currentReplicas: 2,