Merge pull request #54530 from sjenning/validate-state-transition

Automatic merge from submit-queue (batch tested with PRs 56108, 56811, 57335, 57331, 54530). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

api: validate container phase transitions

https://github.com/kubernetes/kubernetes/issues/54499 exposed an issue where a container was transitioning from the terminal phases of `Succeeded` or `Failed` to `Pending`.  It is due to a bug in the kubelet, but additional validation in the API server can prevent this invalid phase transition from being accepted.

@smarterclayton @derekwaynecarr @dashpole @joelsmith @frobware 

I confirmed that the reproducer in https://github.com/kubernetes/kubernetes/issues/54499 does not work with this validation in place.  The erroneous kubelet status update is rejected:
```
status_manager.go:437] Failed to update status for pod "test_default(2f02ecdf-b92a-11e7-a2d0-1c1b0deeddfa)": Pod "test" is invalid: status.containerStatuses[0].state: Forbidden: may not be transitioned to non-terminated state
```

However, it only works to a point with this particular issue.  The termination hangs and eventually the resource is removed from etcd and the status update goes through because there is no old statuses to compare.  Not exactly sure how this happens since there is no pod in etcd anymore  ¯\\_(ツ)_/¯
This commit is contained in:
Kubernetes Submit Queue 2017-12-18 17:45:43 -08:00 committed by GitHub
commit 3e78c4904e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -3332,6 +3332,31 @@ func ValidatePodUpdate(newPod, oldPod *core.Pod) field.ErrorList {
return allErrs
}
// ValidateContainerStateTransition test to if any illegal container state transitions are being attempted
func ValidateContainerStateTransition(newStatuses, oldStatuses []core.ContainerStatus, fldpath *field.Path, restartPolicy core.RestartPolicy) field.ErrorList {
allErrs := field.ErrorList{}
// If we should always restart, containers are allowed to leave the terminated state
if restartPolicy == core.RestartPolicyAlways {
return allErrs
}
for i, oldStatus := range oldStatuses {
// Skip any container that is not terminated
if oldStatus.State.Terminated == nil {
continue
}
// Skip any container that failed but is allowed to restart
if oldStatus.State.Terminated.ExitCode != 0 && restartPolicy == core.RestartPolicyOnFailure {
continue
}
for _, newStatus := range newStatuses {
if oldStatus.Name == newStatus.Name && newStatus.State.Terminated == nil {
allErrs = append(allErrs, field.Forbidden(fldpath.Index(i).Child("state"), "may not be transitioned to non-terminated state"))
}
}
}
return allErrs
}
// ValidatePodStatusUpdate tests to see if the update is legal for an end user to make. newPod is updated with fields
// that cannot be changed.
func ValidatePodStatusUpdate(newPod, oldPod *core.Pod) field.ErrorList {
@ -3339,10 +3364,16 @@ func ValidatePodStatusUpdate(newPod, oldPod *core.Pod) field.ErrorList {
allErrs := ValidateObjectMetaUpdate(&newPod.ObjectMeta, &oldPod.ObjectMeta, fldPath)
allErrs = append(allErrs, ValidatePodSpecificAnnotationUpdates(newPod, oldPod, fldPath.Child("annotations"))...)
fldPath = field.NewPath("status")
if newPod.Spec.NodeName != oldPod.Spec.NodeName {
allErrs = append(allErrs, field.Forbidden(field.NewPath("status", "nodeName"), "may not be changed directly"))
allErrs = append(allErrs, field.Forbidden(fldPath.Child("nodeName"), "may not be changed directly"))
}
// If pod should not restart, make sure the status update does not transition
// any terminated containers to a non-terminated state.
allErrs = append(allErrs, ValidateContainerStateTransition(newPod.Status.ContainerStatuses, oldPod.Status.ContainerStatuses, fldPath.Child("containerStatuses"), oldPod.Spec.RestartPolicy)...)
allErrs = append(allErrs, ValidateContainerStateTransition(newPod.Status.InitContainerStatuses, oldPod.Status.InitContainerStatuses, fldPath.Child("initContainerStatuses"), oldPod.Spec.RestartPolicy)...)
// For status update we ignore changes to pod spec.
newPod.Spec = oldPod.Spec