Retry replication controller rolling updates on version mismatch.

When kubectl does rolling updates of replication controllers, retry updates that
fail due to version mismatches (caused by concurrent updates by other clients).
These failed rolling updates were causing intermittent e2e test failures
(e.g. issue 5821)
This commit is contained in:
Quinton Hoole
2015-03-25 14:51:58 -07:00
parent dca645d416
commit 40e2eae5b4
6 changed files with 126 additions and 44 deletions

View File

@@ -19,6 +19,7 @@ package kubectl
import (
"fmt"
"strconv"
"time"
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
@@ -79,7 +80,13 @@ func (precondition *ResizePrecondition) Validate(controller *api.ReplicationCont
}
type Resizer interface {
Resize(namespace, name string, preconditions *ResizePrecondition, newSize uint) (string, error)
// Resize resizes the named resource after checking preconditions. It optionally
// retries in the event of resource version mismatch (if retry is not nil),
// and optionally waits until the status of the resource matches newSize (if wait is not nil)
Resize(namespace, name string, newSize uint, preconditions *ResizePrecondition, retry, wait *RetryParams) error
// ResizeSimple does a simple one-shot attempt at resizing - not useful on it's own, but
// a necessary building block for Resize
ResizeSimple(namespace, name string, preconditions *ResizePrecondition, newSize uint) (string, error)
}
func ResizerFor(kind string, c client.Interface) (Resizer, error) {
@@ -94,10 +101,14 @@ type ReplicationControllerResizer struct {
client.Interface
}
type RetryParams struct {
interval, timeout time.Duration
}
// ResizeCondition is a closure around Resize that facilitates retries via util.wait
func ResizeCondition(r Resizer, precondition *ResizePrecondition, namespace, name string, count uint) wait.ConditionFunc {
return func() (bool, error) {
_, err := r.Resize(namespace, name, precondition, count)
_, err := r.ResizeSimple(namespace, name, precondition, count)
switch e, _ := err.(ControllerResizeError); err.(type) {
case nil:
return true, nil
@@ -110,19 +121,17 @@ func ResizeCondition(r Resizer, precondition *ResizePrecondition, namespace, nam
}
}
func (resize *ReplicationControllerResizer) Resize(namespace, name string, preconditions *ResizePrecondition, newSize uint) (string, error) {
rc := resize.ReplicationControllers(namespace)
func (resizer *ReplicationControllerResizer) ResizeSimple(namespace, name string, preconditions *ResizePrecondition, newSize uint) (string, error) {
rc := resizer.ReplicationControllers(namespace)
controller, err := rc.Get(name)
if err != nil {
return "", ControllerResizeError{ControllerResizeGetFailure, "Unknown", err}
}
if preconditions != nil {
if err := preconditions.Validate(controller); err != nil {
return "", err
}
}
controller.Spec.Replicas = int(newSize)
// TODO: do retry on 409 errors here?
if _, err := rc.Update(controller); err != nil {
@@ -131,3 +140,28 @@ func (resize *ReplicationControllerResizer) Resize(namespace, name string, preco
// TODO: do a better job of printing objects here.
return "resized", nil
}
// Resize updates a ReplicationController to a new size, with optional precondition check (if preconditions is not nil),
// optional retries (if retry is not nil), and then optionally waits for it's replica count to reach the new value
// (if wait is not nil).
func (resizer *ReplicationControllerResizer) Resize(namespace, name string, newSize uint, preconditions *ResizePrecondition, retry, waitForReplicas *RetryParams) error {
if preconditions == nil {
preconditions = &ResizePrecondition{-1, ""}
}
if retry == nil {
// Make it try only once, immediately
retry = &RetryParams{interval: time.Millisecond, timeout: time.Millisecond}
}
cond := ResizeCondition(resizer, preconditions, namespace, name, newSize)
if err := wait.Poll(retry.interval, retry.timeout, cond); err != nil {
return err
}
if waitForReplicas != nil {
rc := &api.ReplicationController{ObjectMeta: api.ObjectMeta{Namespace: namespace, Name: name}}
if err := wait.Poll(waitForReplicas.interval, waitForReplicas.timeout,
client.ControllerHasDesiredReplicas(resizer, rc)); err != nil {
return err
}
}
return nil
}