controller: support perma-failed deployments

This commit adds support for failing deployments based on a timeout
parameter defined in the spec. If there is no progress for the amount
of time defined as progressDeadlineSeconds then the deployment will be
marked as failed by adding a condition with a ProgressDeadlineExceeded
reason in it. Progress in the context of a deployment means the creation
or adoption of a new replica set, scaling up new pods, and scaling down
old pods.
This commit is contained in:
Michail Kargakis
2016-09-15 17:57:53 +02:00
parent c4ff44b66d
commit a5029bf373
10 changed files with 834 additions and 25 deletions

View File

@@ -70,8 +70,111 @@ const (
// TODO: Delete this annotation when we gracefully handle overlapping selectors.
// See https://github.com/kubernetes/kubernetes/issues/2210
SelectorUpdateAnnotation = "deployment.kubernetes.io/selector-updated-at"
// Reasons for deployment conditions
//
// Progressing:
//
// ReplicaSetUpdatedReason is added in a deployment when one of its replica sets is updated as part
// of the rollout process.
ReplicaSetUpdatedReason = "ReplicaSetUpdated"
// FailedRSCreateReason is added in a deployment when it cannot create a new replica set.
FailedRSCreateReason = "ReplicaSetCreateError"
// NewReplicaSetReason is added in a deployment when it creates a new replica set.
NewReplicaSetReason = "NewReplicaSetCreated"
// FoundNewRSReason is added in a deployment when it adopts an existing replica set.
FoundNewRSReason = "FoundNewReplicaSet"
// NewRSAvailableReason is added in a deployment when its newest replica set is made available
// ie. the number of new pods that have passed readiness checks and run for at least minReadySeconds
// is at least the minimum available pods that need to run for the deployment.
NewRSAvailableReason = "NewReplicaSetAvailable"
// TimedOutReason is added in a deployment when its newest replica set fails to show any progress
// within the given deadline (progressDeadlineSeconds).
TimedOutReason = "ProgressDeadlineExceeded"
// PausedDeployReason is added in a deployment when it is paused. Lack of progress shouldn't be
// estimated once a deployment is paused.
PausedDeployReason = "DeploymentPaused"
// ResumedDeployReason is added in a deployment when it is resumed. Useful for not failing accidentally
// deployments that paused amidst a rollout and are bounded by a deadline.
ResumedDeployReason = "DeploymentResumed"
//
// Available:
//
// MinimumReplicasAvailable is added in a deployment when it has its minimum replicas required available.
MinimumReplicasAvailable = "MinimumReplicasAvailable"
// MinimumReplicasUnavailable is added in a deployment when it doesn't have the minimum required replicas
// available.
MinimumReplicasUnavailable = "MinimumReplicasUnavailable"
)
// NewDeploymentCondition creates a new deployment condition.
func NewDeploymentCondition(condType extensions.DeploymentConditionType, status api.ConditionStatus, reason, message string) *extensions.DeploymentCondition {
return &extensions.DeploymentCondition{
Type: condType,
Status: status,
LastUpdateTime: unversioned.Now(),
LastTransitionTime: unversioned.Now(),
Reason: reason,
Message: message,
}
}
// GetDeploymentCondition returns the condition with the provided type.
func GetDeploymentCondition(status extensions.DeploymentStatus, condType extensions.DeploymentConditionType) *extensions.DeploymentCondition {
for i := range status.Conditions {
c := status.Conditions[i]
if c.Type == condType {
return &c
}
}
return nil
}
// SetDeploymentCondition updates the deployment to include the provided condition. If the condition that
// we are about to add already exists and has the same status and reason then we are not going to update.
func SetDeploymentCondition(status *extensions.DeploymentStatus, condition extensions.DeploymentCondition) {
currentCond := GetDeploymentCondition(*status, condition.Type)
if currentCond != nil && currentCond.Status == condition.Status && currentCond.Reason == condition.Reason {
return
}
// Do not update lastTransitionTime if the status of the condition doesn't change.
if currentCond != nil && currentCond.Status == condition.Status {
condition.LastTransitionTime = currentCond.LastTransitionTime
}
newConditions := filterOutCondition(status.Conditions, condition.Type)
status.Conditions = append(newConditions, condition)
}
// RemoveDeploymentCondition removes the deployment condition with the provided type.
func RemoveDeploymentCondition(status *extensions.DeploymentStatus, condType extensions.DeploymentConditionType) {
status.Conditions = filterOutCondition(status.Conditions, condType)
}
// filterOutCondition returns a new slice of deployment conditions without conditions with the provided type.
func filterOutCondition(conditions []extensions.DeploymentCondition, condType extensions.DeploymentConditionType) []extensions.DeploymentCondition {
var newConditions []extensions.DeploymentCondition
for _, c := range conditions {
if c.Type == condType {
continue
}
newConditions = append(newConditions, c)
}
return newConditions
}
// ReplicaSetToDeploymentCondition converts a replica set condition into a deployment condition.
// Useful for promoting replica set failure conditions into deployments.
func ReplicaSetToDeploymentCondition(cond extensions.ReplicaSetCondition) extensions.DeploymentCondition {
return extensions.DeploymentCondition{
Type: extensions.DeploymentConditionType(cond.Type),
Status: cond.Status,
LastTransitionTime: cond.LastTransitionTime,
LastUpdateTime: cond.LastTransitionTime,
Reason: cond.Reason,
Message: cond.Message,
}
}
// SetDeploymentRevision updates the revision for a deployment.
func SetDeploymentRevision(deployment *extensions.Deployment, revision string) bool {
updated := false
@@ -696,6 +799,56 @@ func IsRollingUpdate(deployment *extensions.Deployment) bool {
return deployment.Spec.Strategy.Type == extensions.RollingUpdateDeploymentStrategyType
}
// DeploymentComplete considers a deployment to be complete once its desired replicas equals its
// updatedReplicas and it doesn't violate minimum availability.
func DeploymentComplete(deployment *extensions.Deployment, newStatus *extensions.DeploymentStatus) bool {
return newStatus.UpdatedReplicas == deployment.Spec.Replicas &&
newStatus.AvailableReplicas >= deployment.Spec.Replicas-MaxUnavailable(*deployment)
}
// DeploymentProgressing reports progress for a deployment. Progress is estimated by comparing the
// current with the new status of the deployment that the controller is observing. The following
// algorithm is already used in the kubectl rolling updater to report lack of progress.
func DeploymentProgressing(deployment *extensions.Deployment, newStatus *extensions.DeploymentStatus) bool {
oldStatus := deployment.Status
// Old replicas that need to be scaled down
oldStatusOldReplicas := oldStatus.Replicas - oldStatus.UpdatedReplicas
newStatusOldReplicas := newStatus.Replicas - newStatus.UpdatedReplicas
return (newStatus.UpdatedReplicas > oldStatus.UpdatedReplicas) || (newStatusOldReplicas < oldStatusOldReplicas)
}
// used for unit testing
var nowFn = func() time.Time { return time.Now() }
// DeploymentTimedOut considers a deployment to have timed out once its condition that reports progress
// is older than progressDeadlineSeconds or a Progressing condition with a TimedOutReason reason already
// exists.
func DeploymentTimedOut(deployment *extensions.Deployment, newStatus *extensions.DeploymentStatus) bool {
if deployment.Spec.ProgressDeadlineSeconds == nil {
return false
}
// Look for the Progressing condition. If it doesn't exist, we have no base to estimate progress.
// If it's already set with a TimedOutReason reason, we have already timed out, no need to check
// again.
condition := GetDeploymentCondition(*newStatus, extensions.DeploymentProgressing)
if condition == nil {
return false
}
if condition.Reason == TimedOutReason {
return true
}
// Look at the difference in seconds between now and the last time we reported any
// progress or tried to create a replica set, or resumed a paused deployment and
// compare against progressDeadlineSeconds.
from := condition.LastTransitionTime
delta := time.Duration(*deployment.Spec.ProgressDeadlineSeconds) * time.Second
return from.Add(delta).Before(nowFn())
}
// NewRSNewReplicas calculates the number of replicas a deployment's new RS should have.
// When one of the followings is true, we're rolling out the deployment; otherwise, we're scaling it.
// 1) The new RS is saturated: newRS's replicas == deployment's replicas

View File

@@ -688,7 +688,6 @@ func TestResolveFenceposts(t *testing.T) {
}
func TestNewRSNewReplicas(t *testing.T) {
tests := []struct {
test string
strategyType extensions.DeploymentStrategyType
@@ -703,12 +702,12 @@ func TestNewRSNewReplicas(t *testing.T) {
1, 5, 1, 5,
},
{
"scale up - to depDeplicas",
"scale up - to depReplicas",
extensions.RollingUpdateDeploymentStrategyType,
6, 2, 10, 6,
},
{
"recreate - to depDeplicas",
"recreate - to depReplicas",
extensions.RecreateDeploymentStrategyType,
3, 1, 1, 3,
},
@@ -735,3 +734,373 @@ func TestNewRSNewReplicas(t *testing.T) {
}
}
}
var (
condProgressing = func() extensions.DeploymentCondition {
return extensions.DeploymentCondition{
Type: extensions.DeploymentProgressing,
Status: api.ConditionFalse,
Reason: "ForSomeReason",
}
}
condProgressing2 = func() extensions.DeploymentCondition {
return extensions.DeploymentCondition{
Type: extensions.DeploymentProgressing,
Status: api.ConditionTrue,
Reason: "BecauseItIs",
}
}
condAvailable = func() extensions.DeploymentCondition {
return extensions.DeploymentCondition{
Type: extensions.DeploymentAvailable,
Status: api.ConditionTrue,
Reason: "AwesomeController",
}
}
status = func() *extensions.DeploymentStatus {
return &extensions.DeploymentStatus{
Conditions: []extensions.DeploymentCondition{condProgressing(), condAvailable()},
}
}
)
func TestGetCondition(t *testing.T) {
exampleStatus := status()
tests := []struct {
name string
status extensions.DeploymentStatus
condType extensions.DeploymentConditionType
condStatus api.ConditionStatus
condReason string
expected bool
}{
{
name: "condition exists",
status: *exampleStatus,
condType: extensions.DeploymentAvailable,
expected: true,
},
{
name: "condition does not exist",
status: *exampleStatus,
condType: extensions.DeploymentReplicaFailure,
expected: false,
},
}
for _, test := range tests {
cond := GetDeploymentCondition(test.status, test.condType)
exists := cond != nil
if exists != test.expected {
t.Errorf("%s: expected condition to exist: %t, got: %t", test.name, test.expected, exists)
}
}
}
func TestSetCondition(t *testing.T) {
tests := []struct {
name string
status *extensions.DeploymentStatus
cond extensions.DeploymentCondition
expectedStatus *extensions.DeploymentStatus
}{
{
name: "set for the first time",
status: &extensions.DeploymentStatus{},
cond: condAvailable(),
expectedStatus: &extensions.DeploymentStatus{Conditions: []extensions.DeploymentCondition{condAvailable()}},
},
{
name: "simple set",
status: &extensions.DeploymentStatus{Conditions: []extensions.DeploymentCondition{condProgressing()}},
cond: condAvailable(),
expectedStatus: status(),
},
{
name: "overwrite",
status: &extensions.DeploymentStatus{Conditions: []extensions.DeploymentCondition{condProgressing()}},
cond: condProgressing2(),
expectedStatus: &extensions.DeploymentStatus{Conditions: []extensions.DeploymentCondition{condProgressing2()}},
},
}
for _, test := range tests {
SetDeploymentCondition(test.status, test.cond)
if !reflect.DeepEqual(test.status, test.expectedStatus) {
t.Errorf("%s: expected status: %v, got: %v", test.name, test.expectedStatus, test.status)
}
}
}
func TestRemoveCondition(t *testing.T) {
tests := []struct {
name string
status *extensions.DeploymentStatus
condType extensions.DeploymentConditionType
expectedStatus *extensions.DeploymentStatus
}{
{
name: "remove from empty status",
status: &extensions.DeploymentStatus{},
condType: extensions.DeploymentProgressing,
expectedStatus: &extensions.DeploymentStatus{},
},
{
name: "simple remove",
status: &extensions.DeploymentStatus{Conditions: []extensions.DeploymentCondition{condProgressing()}},
condType: extensions.DeploymentProgressing,
expectedStatus: &extensions.DeploymentStatus{},
},
{
name: "doesn't remove anything",
status: status(),
condType: extensions.DeploymentReplicaFailure,
expectedStatus: status(),
},
}
for _, test := range tests {
RemoveDeploymentCondition(test.status, test.condType)
if !reflect.DeepEqual(test.status, test.expectedStatus) {
t.Errorf("%s: expected status: %v, got: %v", test.name, test.expectedStatus, test.status)
}
}
}
func TestDeploymentComplete(t *testing.T) {
deployment := func(desired, current, updated, available, maxUnavailable int32) *extensions.Deployment {
return &extensions.Deployment{
Spec: extensions.DeploymentSpec{
Replicas: desired,
Strategy: extensions.DeploymentStrategy{
RollingUpdate: &extensions.RollingUpdateDeployment{
MaxUnavailable: intstr.FromInt(int(maxUnavailable)),
},
Type: extensions.RollingUpdateDeploymentStrategyType,
},
},
Status: extensions.DeploymentStatus{
Replicas: current,
UpdatedReplicas: updated,
AvailableReplicas: available,
},
}
}
tests := []struct {
name string
d *extensions.Deployment
expected bool
}{
{
name: "complete",
d: deployment(5, 5, 5, 4, 1),
expected: true,
},
{
name: "not complete",
d: deployment(5, 5, 5, 3, 1),
expected: false,
},
{
name: "complete #2",
d: deployment(5, 5, 5, 5, 0),
expected: true,
},
{
name: "not complete #2",
d: deployment(5, 5, 4, 5, 0),
expected: false,
},
}
for _, test := range tests {
t.Log(test.name)
if got, exp := DeploymentComplete(test.d, &test.d.Status), test.expected; got != exp {
t.Errorf("expected complete: %t, got: %t", exp, got)
}
}
}
func TestDeploymentProgressing(t *testing.T) {
deployment := func(current, updated int32) *extensions.Deployment {
return &extensions.Deployment{
Status: extensions.DeploymentStatus{
Replicas: current,
UpdatedReplicas: updated,
},
}
}
newStatus := func(current, updated int32) extensions.DeploymentStatus {
return extensions.DeploymentStatus{
Replicas: current,
UpdatedReplicas: updated,
}
}
tests := []struct {
name string
d *extensions.Deployment
newStatus extensions.DeploymentStatus
expected bool
}{
{
name: "progressing",
d: deployment(10, 4),
newStatus: newStatus(10, 6),
expected: true,
},
{
name: "not progressing",
d: deployment(10, 4),
newStatus: newStatus(10, 4),
expected: false,
},
{
name: "progressing #2",
d: deployment(10, 4),
newStatus: newStatus(8, 4),
expected: true,
},
{
name: "not progressing #2",
d: deployment(10, 7),
newStatus: newStatus(10, 6),
expected: false,
},
{
name: "progressing #3",
d: deployment(10, 4),
newStatus: newStatus(8, 8),
expected: true,
},
{
name: "not progressing #2",
d: deployment(10, 7),
newStatus: newStatus(10, 7),
expected: false,
},
}
for _, test := range tests {
t.Log(test.name)
if got, exp := DeploymentProgressing(test.d, &test.newStatus), test.expected; got != exp {
t.Errorf("expected progressing: %t, got: %t", exp, got)
}
}
}
func TestDeploymentTimedOut(t *testing.T) {
var (
null *int32
ten = int32(10)
)
timeFn := func(min, sec int) time.Time {
return time.Date(2016, 1, 1, 0, min, sec, 0, time.UTC)
}
deployment := func(condType extensions.DeploymentConditionType, status api.ConditionStatus, pds *int32, from time.Time) extensions.Deployment {
return extensions.Deployment{
Spec: extensions.DeploymentSpec{
ProgressDeadlineSeconds: pds,
},
Status: extensions.DeploymentStatus{
Conditions: []extensions.DeploymentCondition{
{
Type: condType,
Status: status,
LastTransitionTime: unversioned.Time{Time: from},
},
},
},
}
}
tests := []struct {
name string
d extensions.Deployment
nowFn func() time.Time
expected bool
}{
{
name: "no progressDeadlineSeconds specified - no timeout",
d: deployment(extensions.DeploymentProgressing, api.ConditionTrue, null, timeFn(1, 9)),
nowFn: func() time.Time { return timeFn(1, 20) },
expected: false,
},
{
name: "progressDeadlineSeconds: 10s, now - started => 00:01:20 - 00:01:09 => 11s",
d: deployment(extensions.DeploymentProgressing, api.ConditionTrue, &ten, timeFn(1, 9)),
nowFn: func() time.Time { return timeFn(1, 20) },
expected: true,
},
{
name: "progressDeadlineSeconds: 10s, now - started => 00:01:20 - 00:01:11 => 9s",
d: deployment(extensions.DeploymentProgressing, api.ConditionTrue, &ten, timeFn(1, 11)),
nowFn: func() time.Time { return timeFn(1, 20) },
expected: false,
},
}
for _, test := range tests {
t.Log(test.name)
nowFn = test.nowFn
if got, exp := DeploymentTimedOut(&test.d, &test.d.Status), test.expected; got != exp {
t.Errorf("expected timeout: %t, got: %t", exp, got)
}
}
}