mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-22 19:31:44 +00:00
Merge pull request #102821 from ehashman/phase-fix
Ensure kubelet statuses can handle loss of container runtime state
This commit is contained in:
commit
15d3c3a5e2
@ -1529,11 +1529,12 @@ func (kl *Kubelet) generateAPIPodStatus(pod *v1.Pod, podStatus *kubecontainer.Po
|
||||
spec := &pod.Spec
|
||||
allStatus := append(append([]v1.ContainerStatus{}, s.ContainerStatuses...), s.InitContainerStatuses...)
|
||||
s.Phase = getPhase(spec, allStatus)
|
||||
klog.V(4).InfoS("Got phase for pod", "pod", klog.KObj(pod), "phase", s.Phase)
|
||||
// Check for illegal phase transition
|
||||
if pod.Status.Phase == v1.PodFailed || pod.Status.Phase == v1.PodSucceeded {
|
||||
// API server shows terminal phase; transitions are not allowed
|
||||
if s.Phase != pod.Status.Phase {
|
||||
klog.ErrorS(nil, "Pod attempted illegal phase transition", "originalStatusPhase", pod.Status.Phase, "apiStatusPhase", s.Phase, "apiStatus", s)
|
||||
klog.ErrorS(nil, "Pod attempted illegal phase transition", "pod", klog.KObj(pod), "originalStatusPhase", pod.Status.Phase, "apiStatusPhase", s.Phase, "apiStatus", s)
|
||||
// Force back to phase from the API server
|
||||
s.Phase = pod.Status.Phase
|
||||
}
|
||||
@ -1736,8 +1737,6 @@ func (kl *Kubelet) convertToAPIContainerStatuses(pod *v1.Pod, podStatus *kubecon
|
||||
oldStatus, found := oldStatuses[container.Name]
|
||||
if found {
|
||||
if oldStatus.State.Terminated != nil {
|
||||
// Do not update status on terminated init containers as
|
||||
// they be removed at any time.
|
||||
status = &oldStatus
|
||||
} else {
|
||||
// Apply some values from the old statuses as the default values.
|
||||
@ -1760,7 +1759,7 @@ func (kl *Kubelet) convertToAPIContainerStatuses(pod *v1.Pod, podStatus *kubecon
|
||||
continue
|
||||
}
|
||||
// if no container is found, then assuming it should be waiting seems plausible, but the status code requires
|
||||
// that a previous termination be present. If we're offline long enough (or something removed the container?), then
|
||||
// that a previous termination be present. If we're offline long enough or something removed the container, then
|
||||
// the previous termination may not be present. This next code block ensures that if the container was previously running
|
||||
// then when that container status disappears, we can infer that it terminated even if we don't know the status code.
|
||||
// By setting the lasttermination state we are able to leave the container status waiting and present more accurate
|
||||
@ -1779,21 +1778,17 @@ func (kl *Kubelet) convertToAPIContainerStatuses(pod *v1.Pod, podStatus *kubecon
|
||||
continue
|
||||
}
|
||||
|
||||
if pod.DeletionTimestamp == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// and if the pod itself is being deleted, then the CRI may have removed the container already and for whatever reason the kubelet missed the exit code
|
||||
// (this seems not awesome). We know at this point that we will not be restarting the container.
|
||||
// If we're here, we know the pod was previously running, but doesn't have a terminated status. We will check now to
|
||||
// see if it's in a pending state.
|
||||
status := statuses[container.Name]
|
||||
// if the status we're about to write indicates the default, the Waiting status will force this pod back into Pending.
|
||||
// That isn't true, we know the pod is going away.
|
||||
// If the status we're about to write indicates the default, the Waiting status will force this pod back into Pending.
|
||||
// That isn't true, we know the pod was previously running.
|
||||
isDefaultWaitingStatus := status.State.Waiting != nil && status.State.Waiting.Reason == ContainerCreating
|
||||
if hasInitContainers {
|
||||
isDefaultWaitingStatus = status.State.Waiting != nil && status.State.Waiting.Reason == PodInitializing
|
||||
}
|
||||
if !isDefaultWaitingStatus {
|
||||
// we the status was written, don't override
|
||||
// the status was written, don't override
|
||||
continue
|
||||
}
|
||||
if status.LastTerminationState.Terminated != nil {
|
||||
@ -1809,6 +1804,12 @@ func (kl *Kubelet) convertToAPIContainerStatuses(pod *v1.Pod, podStatus *kubecon
|
||||
Message: "The container could not be located when the pod was deleted. The container used to be Running",
|
||||
ExitCode: 137,
|
||||
}
|
||||
|
||||
// If the pod was not deleted, then it's been restarted. Increment restart count.
|
||||
if pod.DeletionTimestamp == nil {
|
||||
status.RestartCount += 1
|
||||
}
|
||||
|
||||
statuses[container.Name] = status
|
||||
}
|
||||
|
||||
|
@ -1782,6 +1782,22 @@ func failedState(cName string) v1.ContainerStatus {
|
||||
},
|
||||
}
|
||||
}
|
||||
func waitingWithLastTerminationUnknown(cName string, restartCount int32) v1.ContainerStatus {
|
||||
return v1.ContainerStatus{
|
||||
Name: cName,
|
||||
State: v1.ContainerState{
|
||||
Waiting: &v1.ContainerStateWaiting{Reason: "ContainerCreating"},
|
||||
},
|
||||
LastTerminationState: v1.ContainerState{
|
||||
Terminated: &v1.ContainerStateTerminated{
|
||||
Reason: "ContainerStatusUnknown",
|
||||
Message: "The container could not be located when the pod was deleted. The container used to be Running",
|
||||
ExitCode: 137,
|
||||
},
|
||||
},
|
||||
RestartCount: restartCount,
|
||||
}
|
||||
}
|
||||
|
||||
func TestPodPhaseWithRestartAlways(t *testing.T) {
|
||||
desiredState := v1.PodSpec{
|
||||
@ -2306,6 +2322,97 @@ func TestPodPhaseWithRestartOnFailure(t *testing.T) {
|
||||
// func TestPodPhaseWithRestartOnFailureInitContainers(t *testing.T) {
|
||||
// }
|
||||
|
||||
func TestConvertToAPIContainerStatuses(t *testing.T) {
|
||||
desiredState := v1.PodSpec{
|
||||
NodeName: "machine",
|
||||
Containers: []v1.Container{
|
||||
{Name: "containerA"},
|
||||
{Name: "containerB"},
|
||||
},
|
||||
RestartPolicy: v1.RestartPolicyAlways,
|
||||
}
|
||||
now := metav1.Now()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
pod *v1.Pod
|
||||
currentStatus *kubecontainer.PodStatus
|
||||
previousStatus []v1.ContainerStatus
|
||||
containers []v1.Container
|
||||
hasInitContainers bool
|
||||
isInitContainer bool
|
||||
expected []v1.ContainerStatus
|
||||
}{
|
||||
{
|
||||
name: "no current status, with previous statuses and deletion",
|
||||
pod: &v1.Pod{
|
||||
Spec: desiredState,
|
||||
Status: v1.PodStatus{
|
||||
ContainerStatuses: []v1.ContainerStatus{
|
||||
runningState("containerA"),
|
||||
runningState("containerB"),
|
||||
},
|
||||
},
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "my-pod", DeletionTimestamp: &now},
|
||||
},
|
||||
currentStatus: &kubecontainer.PodStatus{},
|
||||
previousStatus: []v1.ContainerStatus{
|
||||
runningState("containerA"),
|
||||
runningState("containerB"),
|
||||
},
|
||||
containers: desiredState.Containers,
|
||||
// no init containers
|
||||
// is not an init container
|
||||
expected: []v1.ContainerStatus{
|
||||
waitingWithLastTerminationUnknown("containerA", 0),
|
||||
waitingWithLastTerminationUnknown("containerB", 0),
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "no current status, with previous statuses and no deletion",
|
||||
pod: &v1.Pod{
|
||||
Spec: desiredState,
|
||||
Status: v1.PodStatus{
|
||||
ContainerStatuses: []v1.ContainerStatus{
|
||||
runningState("containerA"),
|
||||
runningState("containerB"),
|
||||
},
|
||||
},
|
||||
},
|
||||
currentStatus: &kubecontainer.PodStatus{},
|
||||
previousStatus: []v1.ContainerStatus{
|
||||
runningState("containerA"),
|
||||
runningState("containerB"),
|
||||
},
|
||||
containers: desiredState.Containers,
|
||||
// no init containers
|
||||
// is not an init container
|
||||
expected: []v1.ContainerStatus{
|
||||
waitingWithLastTerminationUnknown("containerA", 1),
|
||||
waitingWithLastTerminationUnknown("containerB", 1),
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */)
|
||||
defer testKubelet.Cleanup()
|
||||
kl := testKubelet.kubelet
|
||||
containerStatuses := kl.convertToAPIContainerStatuses(
|
||||
test.pod,
|
||||
test.currentStatus,
|
||||
test.previousStatus,
|
||||
test.containers,
|
||||
test.hasInitContainers,
|
||||
test.isInitContainer,
|
||||
)
|
||||
for i, status := range containerStatuses {
|
||||
assert.Equal(t, test.expected[i], status, "[test %s]", test.name)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetExec(t *testing.T) {
|
||||
const (
|
||||
podName = "podFoo"
|
||||
|
Loading…
Reference in New Issue
Block a user