Merge pull request #127193 from DP19/ignore-unready-pods-hpa-containermetrics

allow ContainerResource calculations to continue with missing metrics like Resource calculations
This commit is contained in:
Kubernetes Prow Robot 2024-12-13 15:18:25 +01:00 committed by GitHub
commit 77749c21f6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 41 additions and 22 deletions

View File

@ -75,10 +75,7 @@ func (c *resourceMetricsClient) GetResourceMetric(ctx context.Context, resource
} }
var res PodMetricsInfo var res PodMetricsInfo
if container != "" { if container != "" {
res, err = getContainerMetrics(metrics.Items, resource, container) res = getContainerMetrics(ctx, metrics.Items, resource, container)
if err != nil {
return nil, time.Time{}, fmt.Errorf("failed to get container metrics: %v", err)
}
} else { } else {
res = getPodMetrics(ctx, metrics.Items, resource) res = getPodMetrics(ctx, metrics.Items, resource)
} }
@ -86,7 +83,7 @@ func (c *resourceMetricsClient) GetResourceMetric(ctx context.Context, resource
return res, timestamp, nil return res, timestamp, nil
} }
func getContainerMetrics(rawMetrics []metricsapi.PodMetrics, resource v1.ResourceName, container string) (PodMetricsInfo, error) { func getContainerMetrics(ctx context.Context, rawMetrics []metricsapi.PodMetrics, resource v1.ResourceName, container string) PodMetricsInfo {
res := make(PodMetricsInfo, len(rawMetrics)) res := make(PodMetricsInfo, len(rawMetrics))
for _, m := range rawMetrics { for _, m := range rawMetrics {
containerFound := false containerFound := false
@ -104,10 +101,10 @@ func getContainerMetrics(rawMetrics []metricsapi.PodMetrics, resource v1.Resourc
} }
} }
if !containerFound { if !containerFound {
return nil, fmt.Errorf("container %s not present in metrics for pod %s/%s", container, m.Namespace, m.Name) klog.FromContext(ctx).V(2).Info("Missing container metric", "container", container, "pod", klog.KRef(m.Namespace, m.Name))
} }
} }
return res, nil return res
} }
func getPodMetrics(ctx context.Context, rawMetrics []metricsapi.PodMetrics, resource v1.ResourceName) PodMetricsInfo { func getPodMetrics(ctx context.Context, rawMetrics []metricsapi.PodMetrics, resource v1.ResourceName) PodMetricsInfo {

View File

@ -429,7 +429,22 @@ func TestRESTClientContainerCPUEmptyMetricsForOnePod(t *testing.T) {
container: "test-1", container: "test-1",
targetTimestamp: targetTimestamp, targetTimestamp: targetTimestamp,
window: window, window: window,
desiredError: fmt.Errorf("failed to get container metrics"), reportedPodMetrics: []map[string]int64{{"test-1": 100}, {"test-1": 300, "test-2": 400}, {}},
}
tc.runTest(t)
}
func TestRESTClientContainerCPUEmptyMetricsForOnePodReturnsOnlyFoundContainer(t *testing.T) {
targetTimestamp := 1
window := 30 * time.Second
tc := restClientTestCase{
resourceName: v1.ResourceCPU,
desiredMetricValues: PodMetricsInfo{
"test-pod-1": {Value: 400, Timestamp: offsetTimestampBy(targetTimestamp), Window: window},
},
container: "test-2",
targetTimestamp: targetTimestamp,
window: window,
reportedPodMetrics: []map[string]int64{{"test-1": 100}, {"test-1": 300, "test-2": 400}, {}}, reportedPodMetrics: []map[string]int64{{"test-1": 100}, {"test-1": 300, "test-2": 400}, {}},
} }
tc.runTest(t) tc.runTest(t)

View File

@ -41,6 +41,7 @@ type MetricsClient interface {
// GetResourceMetric gets the given resource metric (and an associated oldest timestamp) // GetResourceMetric gets the given resource metric (and an associated oldest timestamp)
// for the specified named container in all pods matching the specified selector in the given namespace and when // for the specified named container in all pods matching the specified selector in the given namespace and when
// the container is an empty string it returns the sum of all the container metrics. // the container is an empty string it returns the sum of all the container metrics.
// Missing metrics will not error and callers should rely on Pod status to filter metrics info returned from this interface.
GetResourceMetric(ctx context.Context, resource v1.ResourceName, namespace string, selector labels.Selector, container string) (PodMetricsInfo, time.Time, error) GetResourceMetric(ctx context.Context, resource v1.ResourceName, namespace string, selector labels.Selector, container string) (PodMetricsInfo, time.Time, error)
// GetRawMetric gets the given metric (and an associated oldest timestamp) // GetRawMetric gets the given metric (and an associated oldest timestamp)

View File

@ -446,20 +446,6 @@ func TestReplicaCalcDisjointResourcesMetrics(t *testing.T) {
tc.runTest(t) tc.runTest(t)
} }
func TestReplicaCalcMissingContainerMetricError(t *testing.T) {
tc := replicaCalcTestCase{
currentReplicas: 1,
expectedError: fmt.Errorf("container container2 not present in metrics for pod test-namespace/test-pod-0"),
resource: &resourceInfo{
name: v1.ResourceCPU,
requests: []resource.Quantity{resource.MustParse("1.0")},
levels: [][]int64{{0}},
},
container: "container2",
}
tc.runTest(t)
}
func TestReplicaCalcScaleUp(t *testing.T) { func TestReplicaCalcScaleUp(t *testing.T) {
tc := replicaCalcTestCase{ tc := replicaCalcTestCase{
currentReplicas: 3, currentReplicas: 3,
@ -606,6 +592,26 @@ func TestReplicaCalcScaleUpIgnoresFailedPods(t *testing.T) {
tc.runTest(t) tc.runTest(t)
} }
func TestReplicaCalcMissingContainerMetricScaleUpIgnoresFailedPods(t *testing.T) {
tc := replicaCalcTestCase{
currentReplicas: 2,
expectedReplicas: 4,
podReadiness: []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionTrue, v1.ConditionFalse, v1.ConditionFalse},
podPhase: []v1.PodPhase{v1.PodRunning, v1.PodRunning, v1.PodFailed, v1.PodFailed},
resource: &resourceInfo{
name: v1.ResourceCPU,
requests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
levels: [][]int64{{1000, 500}, {9000, 700}, {1000}, {9000}},
targetUtilization: 30,
expectedUtilization: 60,
expectedValue: 600,
},
container: "container2",
}
tc.runTest(t)
}
func TestReplicaCalcScaleUpContainerIgnoresFailedPods(t *testing.T) { func TestReplicaCalcScaleUpContainerIgnoresFailedPods(t *testing.T) {
tc := replicaCalcTestCase{ tc := replicaCalcTestCase{
currentReplicas: 2, currentReplicas: 2,