Merge pull request #36439 from kargakis/update-rollover-test

Automatic merge from submit-queue

test: update rollover test to wait for available rs before adopting

Scenario that happened in https://github.com/kubernetes/kubernetes/issues/35355#issuecomment-257808460

-- Replica set that is about to be adopted has 2 out of 4 ready replicas
-- Deployment is created with 4 replicas, adopts pre-existing replica set, creates a new one, and starts rolling replicas over to the new replica set.
```
Nov  2 01:38:17.088: INFO: At 2016-11-02 01:38:04 -0700 PDT - event for test-rollover-deployment: {deployment-controller } ScalingReplicaSet: Scaled down replica set test-rollover-controller to 3
Nov  2 01:38:17.088: INFO: At 2016-11-02 01:38:04 -0700 PDT - event for test-rollover-deployment: {deployment-controller } ScalingReplicaSet: Scaled up replica set test-rollover-deployment-2505289747 to 1
Nov  2 01:38:17.088: INFO: At 2016-11-02 01:38:04 -0700 PDT - event for test-rollover-deployment-2505289747: {replicaset-controller } SuccessfulCreate: Created pod: test-rollover-deployment-2505289747-iuiei
Nov  2 01:38:17.088: INFO: At 2016-11-02 01:38:04 -0700 PDT - event for test-rollover-deployment-2505289747-iuiei: {default-scheduler } Scheduled: Successfully assigned test-rollover-deployment-2505289747-iuiei to gke-jenkins-e2e-default-pool-33c0400e-6q5m
Nov  2 01:38:17.088: INFO: At 2016-11-02 01:38:05 -0700 PDT - event for test-rollover-deployment: {deployment-controller } ScalingReplicaSet: Scaled up replica set test-rollover-deployment-2505289747 to 2
```
At this point there is no minimum availability for the Deployment (maxUnavailable is 1 meaning desired minimum available is 3 but we only have 2), and the new replica set uses a non-existent image. New replica set is scaled up to 1 (maxSurge is 1), then old replica set is scaled down by one, because cleanupUnhealthyReplicas observes that it has 2 unhealthy replicas - it can only scale down one though because the [maximum replicas it can cleanup is one](d87dfa2723/pkg/controller/deployment/rolling.go (L125)) (4+1-3-1). New replica set is scaled to 2. Available replicas are still 2 (third replica from the old replica set has yet to come up).
-- Deployment is rolled over with a new update. Test reaches for the WaitForDeploymentStatus check but there are only 2 availableReplicas (maxUnavailable is still violated).

This change makes the test wait for a healthy replica set before proceeding thus it should never hit the scenario described above.

@kubernetes/deployment
This commit is contained in:
Kubernetes Submit Queue 2016-12-02 12:48:44 -08:00 committed by GitHub
commit d71154b910
2 changed files with 31 additions and 8 deletions

View File

@ -550,6 +550,9 @@ func testRolloverDeployment(f *framework.Framework) {
framework.Logf("error in waiting for pods to come up: %s", err)
Expect(err).NotTo(HaveOccurred())
}
// Wait for replica set to become fully available before adopting it.
framework.Logf("Waiting for replica set %q to become fully available", rsName)
Expect(framework.WaitForAvailableReplicaSet(c, ns, rsName)).NotTo(HaveOccurred())
// Create a deployment to delete nginx pods and instead bring up redis-slave pods.
// We use a nonexistent image here, so that we make sure it won't finish

View File

@ -2960,6 +2960,26 @@ func waitForReplicaSetPodsGone(c clientset.Interface, rs *extensions.ReplicaSet)
})
}
// WaitForAvailableReplicaSet waits until the replica set has all of its replicas available
// and is observed by the replica set controller.
func WaitForAvailableReplicaSet(c clientset.Interface, ns, name string) error {
var status extensions.ReplicaSetStatus
err := wait.PollImmediate(Poll, 5*time.Minute, func() (bool, error) {
rs, err := c.Extensions().ReplicaSets(ns).Get(name)
if err != nil {
return false, err
}
status = rs.Status
return rs.Status.ObservedGeneration >= rs.Generation &&
*(rs.Spec.Replicas) == rs.Status.Replicas &&
*(rs.Spec.Replicas) == rs.Status.AvailableReplicas, nil
})
if err == wait.ErrWaitTimeout {
err = fmt.Errorf("replica set %q never became available: %#v", name, status)
}
return err
}
// Waits for the deployment status to become valid (i.e. max unavailable and max surge aren't violated anymore).
// Note that the status should stay valid at all times unless shortly after a scaling event or the deployment is just created.
// To verify that the deployment status is valid and wait for the rollout to finish, use WaitForDeploymentStatus instead.
@ -2971,7 +2991,7 @@ func WaitForDeploymentStatusValid(c clientset.Interface, d *extensions.Deploymen
reason string
)
err := wait.Poll(Poll, 5*time.Minute, func() (bool, error) {
err := wait.PollImmediate(Poll, 5*time.Minute, func() (bool, error) {
var err error
deployment, err = c.Extensions().Deployments(d.Namespace).Get(d.Name)
if err != nil {
@ -3043,7 +3063,7 @@ func WaitForDeploymentStatus(c clientset.Interface, d *extensions.Deployment) er
deployment *extensions.Deployment
)
err := wait.Poll(Poll, 5*time.Minute, func() (bool, error) {
err := wait.PollImmediate(Poll, 5*time.Minute, func() (bool, error) {
var err error
deployment, err = c.Extensions().Deployments(d.Namespace).Get(d.Name)
if err != nil {
@ -3098,7 +3118,7 @@ func WaitForDeploymentStatus(c clientset.Interface, d *extensions.Deployment) er
// WaitForDeploymentUpdatedReplicasLTE waits for given deployment to be observed by the controller and has at least a number of updatedReplicas
func WaitForDeploymentUpdatedReplicasLTE(c clientset.Interface, ns, deploymentName string, minUpdatedReplicas int, desiredGeneration int64) error {
err := wait.Poll(Poll, 5*time.Minute, func() (bool, error) {
err := wait.PollImmediate(Poll, 5*time.Minute, func() (bool, error) {
deployment, err := c.Extensions().Deployments(ns).Get(deploymentName)
if err != nil {
return false, err
@ -3117,7 +3137,7 @@ func WaitForDeploymentUpdatedReplicasLTE(c clientset.Interface, ns, deploymentNa
// WaitForDeploymentRollbackCleared waits for given deployment either started rolling back or doesn't need to rollback.
// Note that rollback should be cleared shortly, so we only wait for 1 minute here to fail early.
func WaitForDeploymentRollbackCleared(c clientset.Interface, ns, deploymentName string) error {
err := wait.Poll(Poll, 1*time.Minute, func() (bool, error) {
err := wait.PollImmediate(Poll, 1*time.Minute, func() (bool, error) {
deployment, err := c.Extensions().Deployments(ns).Get(deploymentName)
if err != nil {
return false, err
@ -3139,7 +3159,7 @@ func WaitForDeploymentRollbackCleared(c clientset.Interface, ns, deploymentName
func WaitForDeploymentRevisionAndImage(c clientset.Interface, ns, deploymentName string, revision, image string) error {
var deployment *extensions.Deployment
var newRS *extensions.ReplicaSet
err := wait.Poll(Poll, 1*time.Minute, func() (bool, error) {
err := wait.PollImmediate(Poll, 1*time.Minute, func() (bool, error) {
var err error
deployment, err = c.Extensions().Deployments(ns).Get(deploymentName)
if err != nil {
@ -3171,7 +3191,7 @@ func WaitForDeploymentRevisionAndImage(c clientset.Interface, ns, deploymentName
}
func WaitForOverlappingAnnotationMatch(c clientset.Interface, ns, deploymentName, expected string) error {
return wait.Poll(Poll, 1*time.Minute, func() (bool, error) {
return wait.PollImmediate(Poll, 1*time.Minute, func() (bool, error) {
deployment, err := c.Extensions().Deployments(ns).Get(deploymentName)
if err != nil {
return false, err
@ -3205,7 +3225,7 @@ func CheckNewRSAnnotations(c clientset.Interface, ns, deploymentName string, exp
func WaitForPodsReady(c clientset.Interface, ns, name string, minReadySeconds int) error {
label := labels.SelectorFromSet(labels.Set(map[string]string{"name": name}))
options := v1.ListOptions{LabelSelector: label.String()}
return wait.Poll(Poll, 5*time.Minute, func() (bool, error) {
return wait.PollImmediate(Poll, 5*time.Minute, func() (bool, error) {
pods, err := c.Core().Pods(ns).List(options)
if err != nil {
return false, nil
@ -3221,7 +3241,7 @@ func WaitForPodsReady(c clientset.Interface, ns, name string, minReadySeconds in
// Waits for the deployment to clean up old rcs.
func WaitForDeploymentOldRSsNum(c clientset.Interface, ns, deploymentName string, desiredRSNum int) error {
return wait.Poll(Poll, 5*time.Minute, func() (bool, error) {
return wait.PollImmediate(Poll, 5*time.Minute, func() (bool, error) {
deployment, err := c.Extensions().Deployments(ns).Get(deploymentName)
if err != nil {
return false, err