Merge pull request #29505 from kargakis/debug-recreate-flake

Automatic merge from submit-queue

controller: enhance timeout error message for Recreate deployments

Makes the error message from https://github.com/kubernetes/kubernetes/issues/29197 more obvious

@kubernetes/deployment
This commit is contained in:
Kubernetes Submit Queue 2016-09-21 01:45:47 -07:00 committed by GitHub
commit 02605106a6
3 changed files with 29 additions and 27 deletions

View File

@ -174,6 +174,7 @@ pkg/util/json
pkg/util/limitwriter
pkg/util/logs
pkg/util/maps
pkg/util/replicaset
pkg/util/validation/field
pkg/util/workqueue
pkg/version/prometheus

View File

@ -17,10 +17,11 @@ limitations under the License.
package deployment
import (
"fmt"
"k8s.io/kubernetes/pkg/apis/extensions"
unversionedclient "k8s.io/kubernetes/pkg/client/unversioned"
"k8s.io/kubernetes/pkg/controller"
rsutil "k8s.io/kubernetes/pkg/util/replicaset"
"k8s.io/kubernetes/pkg/util/wait"
)
@ -78,16 +79,18 @@ func (dc *DeploymentController) rolloutRecreate(deployment *extensions.Deploymen
// scaleDownOldReplicaSetsForRecreate scales down old replica sets when deployment strategy is "Recreate"
func (dc *DeploymentController) scaleDownOldReplicaSetsForRecreate(oldRSs []*extensions.ReplicaSet, deployment *extensions.Deployment) (bool, error) {
scaled := false
for _, rs := range oldRSs {
for i := range oldRSs {
rs := oldRSs[i]
// Scaling not required.
if rs.Spec.Replicas == 0 {
continue
}
scaledRS, _, err := dc.scaleReplicaSetAndRecordEvent(rs, 0, deployment)
scaledRS, updatedRS, err := dc.scaleReplicaSetAndRecordEvent(rs, 0, deployment)
if err != nil {
return false, err
}
if scaledRS {
oldRSs[i] = updatedRS
scaled = true
}
}
@ -99,9 +102,29 @@ func (dc *DeploymentController) scaleDownOldReplicaSetsForRecreate(oldRSs []*ext
func (dc *DeploymentController) waitForInactiveReplicaSets(oldRSs []*extensions.ReplicaSet) error {
for i := range oldRSs {
rs := oldRSs[i]
desiredGeneration := rs.Generation
observedGeneration := rs.Status.ObservedGeneration
specReplicas := rs.Spec.Replicas
statusReplicas := rs.Status.Replicas
condition := rsutil.ReplicaSetIsInactive(dc.client.Extensions(), rs)
if err := wait.ExponentialBackoff(unversionedclient.DefaultRetry, condition); err != nil {
if err := wait.ExponentialBackoff(unversionedclient.DefaultRetry, func() (bool, error) {
replicaSet, err := dc.rsStore.ReplicaSets(rs.Namespace).Get(rs.Name)
if err != nil {
return false, err
}
specReplicas = replicaSet.Spec.Replicas
statusReplicas = replicaSet.Status.Replicas
observedGeneration = replicaSet.Status.ObservedGeneration
// TODO: We also need to wait for terminating replicas to actually terminate.
// See https://github.com/kubernetes/kubernetes/issues/32567
return observedGeneration >= desiredGeneration && replicaSet.Spec.Replicas == 0 && replicaSet.Status.Replicas == 0, nil
}); err != nil {
if err == wait.ErrWaitTimeout {
err = fmt.Errorf("replica set %q never became inactive: synced=%t, spec.replicas=%d, status.replicas=%d",
rs.Name, observedGeneration >= desiredGeneration, specReplicas, statusReplicas)
}
return err
}
}

View File

@ -108,25 +108,3 @@ func MatchingPodsFunc(rs *extensions.ReplicaSet) (func(api.Pod) bool, error) {
return selector.Matches(podLabelsSelector)
}, nil
}
// ReplicaSetIsInactive returns a condition that will be true when a replica set is inactive ie.
// it has zero running replicas.
func ReplicaSetIsInactive(c unversionedextensions.ExtensionsInterface, replicaSet *extensions.ReplicaSet) wait.ConditionFunc {
// If we're given a ReplicaSet where the status lags the spec, it either means that the
// ReplicaSet is stale, or that the ReplicaSet manager hasn't noticed the update yet.
// Polling status.Replicas is not safe in the latter case.
desiredGeneration := replicaSet.Generation
return func() (bool, error) {
rs, err := c.ReplicaSets(replicaSet.Namespace).Get(replicaSet.Name)
if err != nil {
return false, err
}
return rs.Status.ObservedGeneration >= desiredGeneration &&
rs.Spec.Replicas == 0 &&
rs.Status.Replicas == 0 &&
rs.Status.FullyLabeledReplicas == 0, nil
}
}