mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 19:56:01 +00:00
Merge pull request #102821 from ehashman/phase-fix
Ensure kubelet statuses can handle loss of container runtime state
This commit is contained in:
commit
15d3c3a5e2
@ -1529,11 +1529,12 @@ func (kl *Kubelet) generateAPIPodStatus(pod *v1.Pod, podStatus *kubecontainer.Po
|
|||||||
spec := &pod.Spec
|
spec := &pod.Spec
|
||||||
allStatus := append(append([]v1.ContainerStatus{}, s.ContainerStatuses...), s.InitContainerStatuses...)
|
allStatus := append(append([]v1.ContainerStatus{}, s.ContainerStatuses...), s.InitContainerStatuses...)
|
||||||
s.Phase = getPhase(spec, allStatus)
|
s.Phase = getPhase(spec, allStatus)
|
||||||
|
klog.V(4).InfoS("Got phase for pod", "pod", klog.KObj(pod), "phase", s.Phase)
|
||||||
// Check for illegal phase transition
|
// Check for illegal phase transition
|
||||||
if pod.Status.Phase == v1.PodFailed || pod.Status.Phase == v1.PodSucceeded {
|
if pod.Status.Phase == v1.PodFailed || pod.Status.Phase == v1.PodSucceeded {
|
||||||
// API server shows terminal phase; transitions are not allowed
|
// API server shows terminal phase; transitions are not allowed
|
||||||
if s.Phase != pod.Status.Phase {
|
if s.Phase != pod.Status.Phase {
|
||||||
klog.ErrorS(nil, "Pod attempted illegal phase transition", "originalStatusPhase", pod.Status.Phase, "apiStatusPhase", s.Phase, "apiStatus", s)
|
klog.ErrorS(nil, "Pod attempted illegal phase transition", "pod", klog.KObj(pod), "originalStatusPhase", pod.Status.Phase, "apiStatusPhase", s.Phase, "apiStatus", s)
|
||||||
// Force back to phase from the API server
|
// Force back to phase from the API server
|
||||||
s.Phase = pod.Status.Phase
|
s.Phase = pod.Status.Phase
|
||||||
}
|
}
|
||||||
@ -1736,8 +1737,6 @@ func (kl *Kubelet) convertToAPIContainerStatuses(pod *v1.Pod, podStatus *kubecon
|
|||||||
oldStatus, found := oldStatuses[container.Name]
|
oldStatus, found := oldStatuses[container.Name]
|
||||||
if found {
|
if found {
|
||||||
if oldStatus.State.Terminated != nil {
|
if oldStatus.State.Terminated != nil {
|
||||||
// Do not update status on terminated init containers as
|
|
||||||
// they be removed at any time.
|
|
||||||
status = &oldStatus
|
status = &oldStatus
|
||||||
} else {
|
} else {
|
||||||
// Apply some values from the old statuses as the default values.
|
// Apply some values from the old statuses as the default values.
|
||||||
@ -1760,7 +1759,7 @@ func (kl *Kubelet) convertToAPIContainerStatuses(pod *v1.Pod, podStatus *kubecon
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
// if no container is found, then assuming it should be waiting seems plausible, but the status code requires
|
// if no container is found, then assuming it should be waiting seems plausible, but the status code requires
|
||||||
// that a previous termination be present. If we're offline long enough (or something removed the container?), then
|
// that a previous termination be present. If we're offline long enough or something removed the container, then
|
||||||
// the previous termination may not be present. This next code block ensures that if the container was previously running
|
// the previous termination may not be present. This next code block ensures that if the container was previously running
|
||||||
// then when that container status disappears, we can infer that it terminated even if we don't know the status code.
|
// then when that container status disappears, we can infer that it terminated even if we don't know the status code.
|
||||||
// By setting the lasttermination state we are able to leave the container status waiting and present more accurate
|
// By setting the lasttermination state we are able to leave the container status waiting and present more accurate
|
||||||
@ -1779,21 +1778,17 @@ func (kl *Kubelet) convertToAPIContainerStatuses(pod *v1.Pod, podStatus *kubecon
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if pod.DeletionTimestamp == nil {
|
// If we're here, we know the pod was previously running, but doesn't have a terminated status. We will check now to
|
||||||
continue
|
// see if it's in a pending state.
|
||||||
}
|
|
||||||
|
|
||||||
// and if the pod itself is being deleted, then the CRI may have removed the container already and for whatever reason the kubelet missed the exit code
|
|
||||||
// (this seems not awesome). We know at this point that we will not be restarting the container.
|
|
||||||
status := statuses[container.Name]
|
status := statuses[container.Name]
|
||||||
// if the status we're about to write indicates the default, the Waiting status will force this pod back into Pending.
|
// If the status we're about to write indicates the default, the Waiting status will force this pod back into Pending.
|
||||||
// That isn't true, we know the pod is going away.
|
// That isn't true, we know the pod was previously running.
|
||||||
isDefaultWaitingStatus := status.State.Waiting != nil && status.State.Waiting.Reason == ContainerCreating
|
isDefaultWaitingStatus := status.State.Waiting != nil && status.State.Waiting.Reason == ContainerCreating
|
||||||
if hasInitContainers {
|
if hasInitContainers {
|
||||||
isDefaultWaitingStatus = status.State.Waiting != nil && status.State.Waiting.Reason == PodInitializing
|
isDefaultWaitingStatus = status.State.Waiting != nil && status.State.Waiting.Reason == PodInitializing
|
||||||
}
|
}
|
||||||
if !isDefaultWaitingStatus {
|
if !isDefaultWaitingStatus {
|
||||||
// we the status was written, don't override
|
// the status was written, don't override
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if status.LastTerminationState.Terminated != nil {
|
if status.LastTerminationState.Terminated != nil {
|
||||||
@ -1809,6 +1804,12 @@ func (kl *Kubelet) convertToAPIContainerStatuses(pod *v1.Pod, podStatus *kubecon
|
|||||||
Message: "The container could not be located when the pod was deleted. The container used to be Running",
|
Message: "The container could not be located when the pod was deleted. The container used to be Running",
|
||||||
ExitCode: 137,
|
ExitCode: 137,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If the pod was not deleted, then it's been restarted. Increment restart count.
|
||||||
|
if pod.DeletionTimestamp == nil {
|
||||||
|
status.RestartCount += 1
|
||||||
|
}
|
||||||
|
|
||||||
statuses[container.Name] = status
|
statuses[container.Name] = status
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1782,6 +1782,22 @@ func failedState(cName string) v1.ContainerStatus {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
func waitingWithLastTerminationUnknown(cName string, restartCount int32) v1.ContainerStatus {
|
||||||
|
return v1.ContainerStatus{
|
||||||
|
Name: cName,
|
||||||
|
State: v1.ContainerState{
|
||||||
|
Waiting: &v1.ContainerStateWaiting{Reason: "ContainerCreating"},
|
||||||
|
},
|
||||||
|
LastTerminationState: v1.ContainerState{
|
||||||
|
Terminated: &v1.ContainerStateTerminated{
|
||||||
|
Reason: "ContainerStatusUnknown",
|
||||||
|
Message: "The container could not be located when the pod was deleted. The container used to be Running",
|
||||||
|
ExitCode: 137,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
RestartCount: restartCount,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestPodPhaseWithRestartAlways(t *testing.T) {
|
func TestPodPhaseWithRestartAlways(t *testing.T) {
|
||||||
desiredState := v1.PodSpec{
|
desiredState := v1.PodSpec{
|
||||||
@ -2306,6 +2322,97 @@ func TestPodPhaseWithRestartOnFailure(t *testing.T) {
|
|||||||
// func TestPodPhaseWithRestartOnFailureInitContainers(t *testing.T) {
|
// func TestPodPhaseWithRestartOnFailureInitContainers(t *testing.T) {
|
||||||
// }
|
// }
|
||||||
|
|
||||||
|
func TestConvertToAPIContainerStatuses(t *testing.T) {
|
||||||
|
desiredState := v1.PodSpec{
|
||||||
|
NodeName: "machine",
|
||||||
|
Containers: []v1.Container{
|
||||||
|
{Name: "containerA"},
|
||||||
|
{Name: "containerB"},
|
||||||
|
},
|
||||||
|
RestartPolicy: v1.RestartPolicyAlways,
|
||||||
|
}
|
||||||
|
now := metav1.Now()
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
pod *v1.Pod
|
||||||
|
currentStatus *kubecontainer.PodStatus
|
||||||
|
previousStatus []v1.ContainerStatus
|
||||||
|
containers []v1.Container
|
||||||
|
hasInitContainers bool
|
||||||
|
isInitContainer bool
|
||||||
|
expected []v1.ContainerStatus
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "no current status, with previous statuses and deletion",
|
||||||
|
pod: &v1.Pod{
|
||||||
|
Spec: desiredState,
|
||||||
|
Status: v1.PodStatus{
|
||||||
|
ContainerStatuses: []v1.ContainerStatus{
|
||||||
|
runningState("containerA"),
|
||||||
|
runningState("containerB"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
ObjectMeta: metav1.ObjectMeta{Name: "my-pod", DeletionTimestamp: &now},
|
||||||
|
},
|
||||||
|
currentStatus: &kubecontainer.PodStatus{},
|
||||||
|
previousStatus: []v1.ContainerStatus{
|
||||||
|
runningState("containerA"),
|
||||||
|
runningState("containerB"),
|
||||||
|
},
|
||||||
|
containers: desiredState.Containers,
|
||||||
|
// no init containers
|
||||||
|
// is not an init container
|
||||||
|
expected: []v1.ContainerStatus{
|
||||||
|
waitingWithLastTerminationUnknown("containerA", 0),
|
||||||
|
waitingWithLastTerminationUnknown("containerB", 0),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "no current status, with previous statuses and no deletion",
|
||||||
|
pod: &v1.Pod{
|
||||||
|
Spec: desiredState,
|
||||||
|
Status: v1.PodStatus{
|
||||||
|
ContainerStatuses: []v1.ContainerStatus{
|
||||||
|
runningState("containerA"),
|
||||||
|
runningState("containerB"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
currentStatus: &kubecontainer.PodStatus{},
|
||||||
|
previousStatus: []v1.ContainerStatus{
|
||||||
|
runningState("containerA"),
|
||||||
|
runningState("containerB"),
|
||||||
|
},
|
||||||
|
containers: desiredState.Containers,
|
||||||
|
// no init containers
|
||||||
|
// is not an init container
|
||||||
|
expected: []v1.ContainerStatus{
|
||||||
|
waitingWithLastTerminationUnknown("containerA", 1),
|
||||||
|
waitingWithLastTerminationUnknown("containerB", 1),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, test := range tests {
|
||||||
|
t.Run(test.name, func(t *testing.T) {
|
||||||
|
testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */)
|
||||||
|
defer testKubelet.Cleanup()
|
||||||
|
kl := testKubelet.kubelet
|
||||||
|
containerStatuses := kl.convertToAPIContainerStatuses(
|
||||||
|
test.pod,
|
||||||
|
test.currentStatus,
|
||||||
|
test.previousStatus,
|
||||||
|
test.containers,
|
||||||
|
test.hasInitContainers,
|
||||||
|
test.isInitContainer,
|
||||||
|
)
|
||||||
|
for i, status := range containerStatuses {
|
||||||
|
assert.Equal(t, test.expected[i], status, "[test %s]", test.name)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestGetExec(t *testing.T) {
|
func TestGetExec(t *testing.T) {
|
||||||
const (
|
const (
|
||||||
podName = "podFoo"
|
podName = "podFoo"
|
||||||
|
Loading…
Reference in New Issue
Block a user