mirror of
https://github.com/k3s-io/kubernetes.git
synced 2026-01-04 23:17:50 +00:00
controller: support perma-failed deployments
This commit adds support for failing deployments based on a timeout parameter defined in the spec. If there is no progress for the amount of time defined as progressDeadlineSeconds then the deployment will be marked as failed by adding a condition with a ProgressDeadlineExceeded reason in it. Progress in the context of a deployment means the creation or adoption of a new replica set, scaling up new pods, and scaling down old pods.
This commit is contained in:
@@ -70,8 +70,111 @@ const (
|
||||
// TODO: Delete this annotation when we gracefully handle overlapping selectors.
|
||||
// See https://github.com/kubernetes/kubernetes/issues/2210
|
||||
SelectorUpdateAnnotation = "deployment.kubernetes.io/selector-updated-at"
|
||||
|
||||
// Reasons for deployment conditions
|
||||
//
|
||||
// Progressing:
|
||||
//
|
||||
// ReplicaSetUpdatedReason is added in a deployment when one of its replica sets is updated as part
|
||||
// of the rollout process.
|
||||
ReplicaSetUpdatedReason = "ReplicaSetUpdated"
|
||||
// FailedRSCreateReason is added in a deployment when it cannot create a new replica set.
|
||||
FailedRSCreateReason = "ReplicaSetCreateError"
|
||||
// NewReplicaSetReason is added in a deployment when it creates a new replica set.
|
||||
NewReplicaSetReason = "NewReplicaSetCreated"
|
||||
// FoundNewRSReason is added in a deployment when it adopts an existing replica set.
|
||||
FoundNewRSReason = "FoundNewReplicaSet"
|
||||
// NewRSAvailableReason is added in a deployment when its newest replica set is made available
|
||||
// ie. the number of new pods that have passed readiness checks and run for at least minReadySeconds
|
||||
// is at least the minimum available pods that need to run for the deployment.
|
||||
NewRSAvailableReason = "NewReplicaSetAvailable"
|
||||
// TimedOutReason is added in a deployment when its newest replica set fails to show any progress
|
||||
// within the given deadline (progressDeadlineSeconds).
|
||||
TimedOutReason = "ProgressDeadlineExceeded"
|
||||
// PausedDeployReason is added in a deployment when it is paused. Lack of progress shouldn't be
|
||||
// estimated once a deployment is paused.
|
||||
PausedDeployReason = "DeploymentPaused"
|
||||
// ResumedDeployReason is added in a deployment when it is resumed. Useful for not failing accidentally
|
||||
// deployments that paused amidst a rollout and are bounded by a deadline.
|
||||
ResumedDeployReason = "DeploymentResumed"
|
||||
//
|
||||
// Available:
|
||||
//
|
||||
// MinimumReplicasAvailable is added in a deployment when it has its minimum replicas required available.
|
||||
MinimumReplicasAvailable = "MinimumReplicasAvailable"
|
||||
// MinimumReplicasUnavailable is added in a deployment when it doesn't have the minimum required replicas
|
||||
// available.
|
||||
MinimumReplicasUnavailable = "MinimumReplicasUnavailable"
|
||||
)
|
||||
|
||||
// NewDeploymentCondition creates a new deployment condition.
|
||||
func NewDeploymentCondition(condType extensions.DeploymentConditionType, status api.ConditionStatus, reason, message string) *extensions.DeploymentCondition {
|
||||
return &extensions.DeploymentCondition{
|
||||
Type: condType,
|
||||
Status: status,
|
||||
LastUpdateTime: unversioned.Now(),
|
||||
LastTransitionTime: unversioned.Now(),
|
||||
Reason: reason,
|
||||
Message: message,
|
||||
}
|
||||
}
|
||||
|
||||
// GetDeploymentCondition returns the condition with the provided type.
|
||||
func GetDeploymentCondition(status extensions.DeploymentStatus, condType extensions.DeploymentConditionType) *extensions.DeploymentCondition {
|
||||
for i := range status.Conditions {
|
||||
c := status.Conditions[i]
|
||||
if c.Type == condType {
|
||||
return &c
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// SetDeploymentCondition updates the deployment to include the provided condition. If the condition that
|
||||
// we are about to add already exists and has the same status and reason then we are not going to update.
|
||||
func SetDeploymentCondition(status *extensions.DeploymentStatus, condition extensions.DeploymentCondition) {
|
||||
currentCond := GetDeploymentCondition(*status, condition.Type)
|
||||
if currentCond != nil && currentCond.Status == condition.Status && currentCond.Reason == condition.Reason {
|
||||
return
|
||||
}
|
||||
// Do not update lastTransitionTime if the status of the condition doesn't change.
|
||||
if currentCond != nil && currentCond.Status == condition.Status {
|
||||
condition.LastTransitionTime = currentCond.LastTransitionTime
|
||||
}
|
||||
newConditions := filterOutCondition(status.Conditions, condition.Type)
|
||||
status.Conditions = append(newConditions, condition)
|
||||
}
|
||||
|
||||
// RemoveDeploymentCondition removes the deployment condition with the provided type.
|
||||
func RemoveDeploymentCondition(status *extensions.DeploymentStatus, condType extensions.DeploymentConditionType) {
|
||||
status.Conditions = filterOutCondition(status.Conditions, condType)
|
||||
}
|
||||
|
||||
// filterOutCondition returns a new slice of deployment conditions without conditions with the provided type.
|
||||
func filterOutCondition(conditions []extensions.DeploymentCondition, condType extensions.DeploymentConditionType) []extensions.DeploymentCondition {
|
||||
var newConditions []extensions.DeploymentCondition
|
||||
for _, c := range conditions {
|
||||
if c.Type == condType {
|
||||
continue
|
||||
}
|
||||
newConditions = append(newConditions, c)
|
||||
}
|
||||
return newConditions
|
||||
}
|
||||
|
||||
// ReplicaSetToDeploymentCondition converts a replica set condition into a deployment condition.
|
||||
// Useful for promoting replica set failure conditions into deployments.
|
||||
func ReplicaSetToDeploymentCondition(cond extensions.ReplicaSetCondition) extensions.DeploymentCondition {
|
||||
return extensions.DeploymentCondition{
|
||||
Type: extensions.DeploymentConditionType(cond.Type),
|
||||
Status: cond.Status,
|
||||
LastTransitionTime: cond.LastTransitionTime,
|
||||
LastUpdateTime: cond.LastTransitionTime,
|
||||
Reason: cond.Reason,
|
||||
Message: cond.Message,
|
||||
}
|
||||
}
|
||||
|
||||
// SetDeploymentRevision updates the revision for a deployment.
|
||||
func SetDeploymentRevision(deployment *extensions.Deployment, revision string) bool {
|
||||
updated := false
|
||||
@@ -696,6 +799,56 @@ func IsRollingUpdate(deployment *extensions.Deployment) bool {
|
||||
return deployment.Spec.Strategy.Type == extensions.RollingUpdateDeploymentStrategyType
|
||||
}
|
||||
|
||||
// DeploymentComplete considers a deployment to be complete once its desired replicas equals its
|
||||
// updatedReplicas and it doesn't violate minimum availability.
|
||||
func DeploymentComplete(deployment *extensions.Deployment, newStatus *extensions.DeploymentStatus) bool {
|
||||
return newStatus.UpdatedReplicas == deployment.Spec.Replicas &&
|
||||
newStatus.AvailableReplicas >= deployment.Spec.Replicas-MaxUnavailable(*deployment)
|
||||
}
|
||||
|
||||
// DeploymentProgressing reports progress for a deployment. Progress is estimated by comparing the
|
||||
// current with the new status of the deployment that the controller is observing. The following
|
||||
// algorithm is already used in the kubectl rolling updater to report lack of progress.
|
||||
func DeploymentProgressing(deployment *extensions.Deployment, newStatus *extensions.DeploymentStatus) bool {
|
||||
oldStatus := deployment.Status
|
||||
|
||||
// Old replicas that need to be scaled down
|
||||
oldStatusOldReplicas := oldStatus.Replicas - oldStatus.UpdatedReplicas
|
||||
newStatusOldReplicas := newStatus.Replicas - newStatus.UpdatedReplicas
|
||||
|
||||
return (newStatus.UpdatedReplicas > oldStatus.UpdatedReplicas) || (newStatusOldReplicas < oldStatusOldReplicas)
|
||||
}
|
||||
|
||||
// used for unit testing
|
||||
var nowFn = func() time.Time { return time.Now() }
|
||||
|
||||
// DeploymentTimedOut considers a deployment to have timed out once its condition that reports progress
|
||||
// is older than progressDeadlineSeconds or a Progressing condition with a TimedOutReason reason already
|
||||
// exists.
|
||||
func DeploymentTimedOut(deployment *extensions.Deployment, newStatus *extensions.DeploymentStatus) bool {
|
||||
if deployment.Spec.ProgressDeadlineSeconds == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
// Look for the Progressing condition. If it doesn't exist, we have no base to estimate progress.
|
||||
// If it's already set with a TimedOutReason reason, we have already timed out, no need to check
|
||||
// again.
|
||||
condition := GetDeploymentCondition(*newStatus, extensions.DeploymentProgressing)
|
||||
if condition == nil {
|
||||
return false
|
||||
}
|
||||
if condition.Reason == TimedOutReason {
|
||||
return true
|
||||
}
|
||||
|
||||
// Look at the difference in seconds between now and the last time we reported any
|
||||
// progress or tried to create a replica set, or resumed a paused deployment and
|
||||
// compare against progressDeadlineSeconds.
|
||||
from := condition.LastTransitionTime
|
||||
delta := time.Duration(*deployment.Spec.ProgressDeadlineSeconds) * time.Second
|
||||
return from.Add(delta).Before(nowFn())
|
||||
}
|
||||
|
||||
// NewRSNewReplicas calculates the number of replicas a deployment's new RS should have.
|
||||
// When one of the followings is true, we're rolling out the deployment; otherwise, we're scaling it.
|
||||
// 1) The new RS is saturated: newRS's replicas == deployment's replicas
|
||||
|
||||
@@ -688,7 +688,6 @@ func TestResolveFenceposts(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestNewRSNewReplicas(t *testing.T) {
|
||||
|
||||
tests := []struct {
|
||||
test string
|
||||
strategyType extensions.DeploymentStrategyType
|
||||
@@ -703,12 +702,12 @@ func TestNewRSNewReplicas(t *testing.T) {
|
||||
1, 5, 1, 5,
|
||||
},
|
||||
{
|
||||
"scale up - to depDeplicas",
|
||||
"scale up - to depReplicas",
|
||||
extensions.RollingUpdateDeploymentStrategyType,
|
||||
6, 2, 10, 6,
|
||||
},
|
||||
{
|
||||
"recreate - to depDeplicas",
|
||||
"recreate - to depReplicas",
|
||||
extensions.RecreateDeploymentStrategyType,
|
||||
3, 1, 1, 3,
|
||||
},
|
||||
@@ -735,3 +734,373 @@ func TestNewRSNewReplicas(t *testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var (
|
||||
condProgressing = func() extensions.DeploymentCondition {
|
||||
return extensions.DeploymentCondition{
|
||||
Type: extensions.DeploymentProgressing,
|
||||
Status: api.ConditionFalse,
|
||||
Reason: "ForSomeReason",
|
||||
}
|
||||
}
|
||||
|
||||
condProgressing2 = func() extensions.DeploymentCondition {
|
||||
return extensions.DeploymentCondition{
|
||||
Type: extensions.DeploymentProgressing,
|
||||
Status: api.ConditionTrue,
|
||||
Reason: "BecauseItIs",
|
||||
}
|
||||
}
|
||||
|
||||
condAvailable = func() extensions.DeploymentCondition {
|
||||
return extensions.DeploymentCondition{
|
||||
Type: extensions.DeploymentAvailable,
|
||||
Status: api.ConditionTrue,
|
||||
Reason: "AwesomeController",
|
||||
}
|
||||
}
|
||||
|
||||
status = func() *extensions.DeploymentStatus {
|
||||
return &extensions.DeploymentStatus{
|
||||
Conditions: []extensions.DeploymentCondition{condProgressing(), condAvailable()},
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
func TestGetCondition(t *testing.T) {
|
||||
exampleStatus := status()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
|
||||
status extensions.DeploymentStatus
|
||||
condType extensions.DeploymentConditionType
|
||||
condStatus api.ConditionStatus
|
||||
condReason string
|
||||
|
||||
expected bool
|
||||
}{
|
||||
{
|
||||
name: "condition exists",
|
||||
|
||||
status: *exampleStatus,
|
||||
condType: extensions.DeploymentAvailable,
|
||||
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "condition does not exist",
|
||||
|
||||
status: *exampleStatus,
|
||||
condType: extensions.DeploymentReplicaFailure,
|
||||
|
||||
expected: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
cond := GetDeploymentCondition(test.status, test.condType)
|
||||
exists := cond != nil
|
||||
if exists != test.expected {
|
||||
t.Errorf("%s: expected condition to exist: %t, got: %t", test.name, test.expected, exists)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestSetCondition(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
|
||||
status *extensions.DeploymentStatus
|
||||
cond extensions.DeploymentCondition
|
||||
|
||||
expectedStatus *extensions.DeploymentStatus
|
||||
}{
|
||||
{
|
||||
name: "set for the first time",
|
||||
|
||||
status: &extensions.DeploymentStatus{},
|
||||
cond: condAvailable(),
|
||||
|
||||
expectedStatus: &extensions.DeploymentStatus{Conditions: []extensions.DeploymentCondition{condAvailable()}},
|
||||
},
|
||||
{
|
||||
name: "simple set",
|
||||
|
||||
status: &extensions.DeploymentStatus{Conditions: []extensions.DeploymentCondition{condProgressing()}},
|
||||
cond: condAvailable(),
|
||||
|
||||
expectedStatus: status(),
|
||||
},
|
||||
{
|
||||
name: "overwrite",
|
||||
|
||||
status: &extensions.DeploymentStatus{Conditions: []extensions.DeploymentCondition{condProgressing()}},
|
||||
cond: condProgressing2(),
|
||||
|
||||
expectedStatus: &extensions.DeploymentStatus{Conditions: []extensions.DeploymentCondition{condProgressing2()}},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
SetDeploymentCondition(test.status, test.cond)
|
||||
if !reflect.DeepEqual(test.status, test.expectedStatus) {
|
||||
t.Errorf("%s: expected status: %v, got: %v", test.name, test.expectedStatus, test.status)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRemoveCondition(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
|
||||
status *extensions.DeploymentStatus
|
||||
condType extensions.DeploymentConditionType
|
||||
|
||||
expectedStatus *extensions.DeploymentStatus
|
||||
}{
|
||||
{
|
||||
name: "remove from empty status",
|
||||
|
||||
status: &extensions.DeploymentStatus{},
|
||||
condType: extensions.DeploymentProgressing,
|
||||
|
||||
expectedStatus: &extensions.DeploymentStatus{},
|
||||
},
|
||||
{
|
||||
name: "simple remove",
|
||||
|
||||
status: &extensions.DeploymentStatus{Conditions: []extensions.DeploymentCondition{condProgressing()}},
|
||||
condType: extensions.DeploymentProgressing,
|
||||
|
||||
expectedStatus: &extensions.DeploymentStatus{},
|
||||
},
|
||||
{
|
||||
name: "doesn't remove anything",
|
||||
|
||||
status: status(),
|
||||
condType: extensions.DeploymentReplicaFailure,
|
||||
|
||||
expectedStatus: status(),
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
RemoveDeploymentCondition(test.status, test.condType)
|
||||
if !reflect.DeepEqual(test.status, test.expectedStatus) {
|
||||
t.Errorf("%s: expected status: %v, got: %v", test.name, test.expectedStatus, test.status)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeploymentComplete(t *testing.T) {
|
||||
deployment := func(desired, current, updated, available, maxUnavailable int32) *extensions.Deployment {
|
||||
return &extensions.Deployment{
|
||||
Spec: extensions.DeploymentSpec{
|
||||
Replicas: desired,
|
||||
Strategy: extensions.DeploymentStrategy{
|
||||
RollingUpdate: &extensions.RollingUpdateDeployment{
|
||||
MaxUnavailable: intstr.FromInt(int(maxUnavailable)),
|
||||
},
|
||||
Type: extensions.RollingUpdateDeploymentStrategyType,
|
||||
},
|
||||
},
|
||||
Status: extensions.DeploymentStatus{
|
||||
Replicas: current,
|
||||
UpdatedReplicas: updated,
|
||||
AvailableReplicas: available,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
|
||||
d *extensions.Deployment
|
||||
|
||||
expected bool
|
||||
}{
|
||||
{
|
||||
name: "complete",
|
||||
|
||||
d: deployment(5, 5, 5, 4, 1),
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "not complete",
|
||||
|
||||
d: deployment(5, 5, 5, 3, 1),
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "complete #2",
|
||||
|
||||
d: deployment(5, 5, 5, 5, 0),
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "not complete #2",
|
||||
|
||||
d: deployment(5, 5, 4, 5, 0),
|
||||
expected: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Log(test.name)
|
||||
|
||||
if got, exp := DeploymentComplete(test.d, &test.d.Status), test.expected; got != exp {
|
||||
t.Errorf("expected complete: %t, got: %t", exp, got)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeploymentProgressing(t *testing.T) {
|
||||
deployment := func(current, updated int32) *extensions.Deployment {
|
||||
return &extensions.Deployment{
|
||||
Status: extensions.DeploymentStatus{
|
||||
Replicas: current,
|
||||
UpdatedReplicas: updated,
|
||||
},
|
||||
}
|
||||
}
|
||||
newStatus := func(current, updated int32) extensions.DeploymentStatus {
|
||||
return extensions.DeploymentStatus{
|
||||
Replicas: current,
|
||||
UpdatedReplicas: updated,
|
||||
}
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
|
||||
d *extensions.Deployment
|
||||
newStatus extensions.DeploymentStatus
|
||||
|
||||
expected bool
|
||||
}{
|
||||
{
|
||||
name: "progressing",
|
||||
|
||||
d: deployment(10, 4),
|
||||
newStatus: newStatus(10, 6),
|
||||
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "not progressing",
|
||||
|
||||
d: deployment(10, 4),
|
||||
newStatus: newStatus(10, 4),
|
||||
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "progressing #2",
|
||||
|
||||
d: deployment(10, 4),
|
||||
newStatus: newStatus(8, 4),
|
||||
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "not progressing #2",
|
||||
|
||||
d: deployment(10, 7),
|
||||
newStatus: newStatus(10, 6),
|
||||
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "progressing #3",
|
||||
|
||||
d: deployment(10, 4),
|
||||
newStatus: newStatus(8, 8),
|
||||
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "not progressing #2",
|
||||
|
||||
d: deployment(10, 7),
|
||||
newStatus: newStatus(10, 7),
|
||||
|
||||
expected: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Log(test.name)
|
||||
|
||||
if got, exp := DeploymentProgressing(test.d, &test.newStatus), test.expected; got != exp {
|
||||
t.Errorf("expected progressing: %t, got: %t", exp, got)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeploymentTimedOut(t *testing.T) {
|
||||
var (
|
||||
null *int32
|
||||
ten = int32(10)
|
||||
)
|
||||
|
||||
timeFn := func(min, sec int) time.Time {
|
||||
return time.Date(2016, 1, 1, 0, min, sec, 0, time.UTC)
|
||||
}
|
||||
deployment := func(condType extensions.DeploymentConditionType, status api.ConditionStatus, pds *int32, from time.Time) extensions.Deployment {
|
||||
return extensions.Deployment{
|
||||
Spec: extensions.DeploymentSpec{
|
||||
ProgressDeadlineSeconds: pds,
|
||||
},
|
||||
Status: extensions.DeploymentStatus{
|
||||
Conditions: []extensions.DeploymentCondition{
|
||||
{
|
||||
Type: condType,
|
||||
Status: status,
|
||||
LastTransitionTime: unversioned.Time{Time: from},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
|
||||
d extensions.Deployment
|
||||
nowFn func() time.Time
|
||||
|
||||
expected bool
|
||||
}{
|
||||
{
|
||||
name: "no progressDeadlineSeconds specified - no timeout",
|
||||
|
||||
d: deployment(extensions.DeploymentProgressing, api.ConditionTrue, null, timeFn(1, 9)),
|
||||
nowFn: func() time.Time { return timeFn(1, 20) },
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "progressDeadlineSeconds: 10s, now - started => 00:01:20 - 00:01:09 => 11s",
|
||||
|
||||
d: deployment(extensions.DeploymentProgressing, api.ConditionTrue, &ten, timeFn(1, 9)),
|
||||
nowFn: func() time.Time { return timeFn(1, 20) },
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "progressDeadlineSeconds: 10s, now - started => 00:01:20 - 00:01:11 => 9s",
|
||||
|
||||
d: deployment(extensions.DeploymentProgressing, api.ConditionTrue, &ten, timeFn(1, 11)),
|
||||
nowFn: func() time.Time { return timeFn(1, 20) },
|
||||
expected: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Log(test.name)
|
||||
|
||||
nowFn = test.nowFn
|
||||
if got, exp := DeploymentTimedOut(&test.d, &test.d.Status), test.expected; got != exp {
|
||||
t.Errorf("expected timeout: %t, got: %t", exp, got)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user