Fix bug about unintentional scale out during updating deployment.

During rolling update with maxSurge=1 and maxUnavailable=0,
len(metrics) is greater than currentReplcas
and it may cause unintentional scale out.
This commit is contained in:
shibataka000 2019-11-09 06:00:58 +00:00
parent e1685b5b59
commit b7122770f8
2 changed files with 25 additions and 1 deletions

View File

@ -134,9 +134,15 @@ func (c *ReplicaCalculator) GetResourceReplicas(currentReplicas int32, targetUti
return currentReplicas, utilization, rawUtilization, timestamp, nil
}
newReplicas := int32(math.Ceil(newUsageRatio * float64(len(metrics))))
if (newUsageRatio < 1.0 && newReplicas > currentReplicas) || (newUsageRatio > 1.0 && newReplicas < currentReplicas) {
// return the current replicas if the change of metrics length would cause a change in scale direction
return currentReplicas, utilization, rawUtilization, timestamp, nil
}
// return the result, where the number of replicas considered is
// however many replicas factored into our calculation
return int32(math.Ceil(newUsageRatio * float64(len(metrics)))), utilization, rawUtilization, timestamp, nil
return newReplicas, utilization, rawUtilization, timestamp, nil
}
// GetRawResourceReplicas calculates the desired replica count based on a target resource utilization (as a raw milli-value)

View File

@ -1247,6 +1247,24 @@ func TestReplicaCalcMissingMetricsUnreadyScaleDown(t *testing.T) {
tc.runTest(t)
}
func TestReplicaCalcDuringRollingUpdateWithMaxSurge(t *testing.T) {
tc := replicaCalcTestCase{
currentReplicas: 2,
expectedReplicas: 2,
podPhase: []v1.PodPhase{v1.PodRunning, v1.PodRunning, v1.PodRunning},
resource: &resourceInfo{
name: v1.ResourceCPU,
requests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
levels: []int64{100, 100},
targetUtilization: 50,
expectedUtilization: 10,
expectedValue: numContainersPerPod * 100,
},
}
tc.runTest(t)
}
// TestComputedToleranceAlgImplementation is a regression test which
// back-calculates a minimal percentage for downscaling based on a small percentage
// increase in pod utilization which is calibrated against the tolerance value.