mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-22 19:31:44 +00:00
kubelet: Preserve existing container status when pod terminated
The kubelet must not allow a container that was reported failed in a restartPolicy=Never pod to be reported to the apiserver as success. If a client deletes a restartPolicy=Never pod, the dispatchWork and status manager race to update the container status. When dispatchWork (specifically podIsTerminated) returns true, it means all containers are stopped, which means status in the container is accurate. However, the TerminatePod method then clears this status. This results in a pod that has been reported with status.phase=Failed getting reset to status.phase.Succeeded, which is a violation of the guarantees around terminal phase. Ensure the Kubelet never reports that a container succeeded when it hasn't run or been executed by guarding the terminate pod loop from ever reporting 0 in the absence of container status.
This commit is contained in:
parent
6d98b0a0f4
commit
ad3d8949f0
@ -2001,9 +2001,10 @@ func (kl *Kubelet) syncLoopIteration(configCh <-chan kubetypes.PodUpdate, handle
|
||||
}
|
||||
|
||||
// dispatchWork starts the asynchronous sync of the pod in a pod worker.
|
||||
// If the pod is terminated, dispatchWork
|
||||
// If the pod is terminated, dispatchWork will perform no action.
|
||||
func (kl *Kubelet) dispatchWork(pod *v1.Pod, syncType kubetypes.SyncPodType, mirrorPod *v1.Pod, start time.Time) {
|
||||
if kl.podIsTerminated(pod) {
|
||||
klog.V(4).Infof("Pod %q is terminated, ignoring remaining sync work: %s", format.Pod(pod), syncType)
|
||||
if pod.DeletionTimestamp != nil {
|
||||
// If the pod is in a terminated state, there is no pod worker to
|
||||
// handle the work item. Check if the DeletionTimestamp has been
|
||||
|
@ -54,6 +54,7 @@ go_test(
|
||||
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/util/diff:go_default_library",
|
||||
"//staging/src/k8s.io/client-go/kubernetes:go_default_library",
|
||||
"//staging/src/k8s.io/client-go/kubernetes/fake:go_default_library",
|
||||
"//staging/src/k8s.io/client-go/testing:go_default_library",
|
||||
|
@ -314,21 +314,39 @@ func findContainerStatus(status *v1.PodStatus, containerID string) (containerSta
|
||||
func (m *manager) TerminatePod(pod *v1.Pod) {
|
||||
m.podStatusesLock.Lock()
|
||||
defer m.podStatusesLock.Unlock()
|
||||
|
||||
// ensure that all containers have a terminated state - because we do not know whether the container
|
||||
// was successful, always report an error
|
||||
oldStatus := &pod.Status
|
||||
if cachedStatus, ok := m.podStatuses[pod.UID]; ok {
|
||||
oldStatus = &cachedStatus.status
|
||||
}
|
||||
status := *oldStatus.DeepCopy()
|
||||
for i := range status.ContainerStatuses {
|
||||
if status.ContainerStatuses[i].State.Terminated != nil || status.ContainerStatuses[i].State.Waiting != nil {
|
||||
continue
|
||||
}
|
||||
status.ContainerStatuses[i].State = v1.ContainerState{
|
||||
Terminated: &v1.ContainerStateTerminated{},
|
||||
Terminated: &v1.ContainerStateTerminated{
|
||||
Reason: "ContainerStatusUnknown",
|
||||
Message: "The container could not be located when the pod was terminated",
|
||||
ExitCode: 137,
|
||||
},
|
||||
}
|
||||
}
|
||||
for i := range status.InitContainerStatuses {
|
||||
if status.InitContainerStatuses[i].State.Terminated != nil || status.InitContainerStatuses[i].State.Waiting != nil {
|
||||
continue
|
||||
}
|
||||
status.InitContainerStatuses[i].State = v1.ContainerState{
|
||||
Terminated: &v1.ContainerStateTerminated{},
|
||||
Terminated: &v1.ContainerStateTerminated{
|
||||
Reason: "ContainerStatusUnknown",
|
||||
Message: "The container could not be located when the pod was terminated",
|
||||
ExitCode: 137,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
m.updateStatusInternal(pod, status, true)
|
||||
}
|
||||
|
||||
|
@ -27,11 +27,12 @@ import (
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/runtime/schema"
|
||||
"k8s.io/apimachinery/pkg/util/diff"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
"k8s.io/client-go/kubernetes/fake"
|
||||
core "k8s.io/client-go/testing"
|
||||
@ -569,6 +570,16 @@ func TestTerminatePod(t *testing.T) {
|
||||
t.Logf("update the pod's status to Failed. TerminatePod should preserve this status update.")
|
||||
firstStatus := getRandomPodStatus()
|
||||
firstStatus.Phase = v1.PodFailed
|
||||
firstStatus.InitContainerStatuses = []v1.ContainerStatus{
|
||||
{Name: "init-test-1"},
|
||||
{Name: "init-test-2", State: v1.ContainerState{Terminated: &v1.ContainerStateTerminated{Reason: "InitTest", ExitCode: 0}}},
|
||||
{Name: "init-test-3", State: v1.ContainerState{Terminated: &v1.ContainerStateTerminated{Reason: "InitTest", ExitCode: 3}}},
|
||||
}
|
||||
firstStatus.ContainerStatuses = []v1.ContainerStatus{
|
||||
{Name: "test-1"},
|
||||
{Name: "test-2", State: v1.ContainerState{Terminated: &v1.ContainerStateTerminated{Reason: "Test", ExitCode: 2}}},
|
||||
{Name: "test-3", State: v1.ContainerState{Terminated: &v1.ContainerStateTerminated{Reason: "Test", ExitCode: 0}}},
|
||||
}
|
||||
syncer.SetPodStatus(testPod, firstStatus)
|
||||
|
||||
t.Logf("set the testPod to a pod with Phase running, to simulate a stale pod")
|
||||
@ -586,6 +597,26 @@ func TestTerminatePod(t *testing.T) {
|
||||
assert.False(t, newStatus.InitContainerStatuses[i].State.Terminated == nil, "expected init containers to be terminated")
|
||||
}
|
||||
|
||||
expectUnknownState := v1.ContainerState{Terminated: &v1.ContainerStateTerminated{Reason: "ContainerStatusUnknown", Message: "The container could not be located when the pod was terminated", ExitCode: 137}}
|
||||
if !reflect.DeepEqual(newStatus.InitContainerStatuses[0].State, expectUnknownState) {
|
||||
t.Errorf("terminated container state not defaulted: %s", diff.ObjectReflectDiff(newStatus.InitContainerStatuses[0].State, expectUnknownState))
|
||||
}
|
||||
if !reflect.DeepEqual(newStatus.InitContainerStatuses[1].State, firstStatus.InitContainerStatuses[1].State) {
|
||||
t.Errorf("existing terminated container state not preserved: %#v", newStatus.ContainerStatuses)
|
||||
}
|
||||
if !reflect.DeepEqual(newStatus.InitContainerStatuses[2].State, firstStatus.InitContainerStatuses[2].State) {
|
||||
t.Errorf("existing terminated container state not preserved: %#v", newStatus.ContainerStatuses)
|
||||
}
|
||||
if !reflect.DeepEqual(newStatus.ContainerStatuses[0].State, expectUnknownState) {
|
||||
t.Errorf("terminated container state not defaulted: %s", diff.ObjectReflectDiff(newStatus.ContainerStatuses[0].State, expectUnknownState))
|
||||
}
|
||||
if !reflect.DeepEqual(newStatus.ContainerStatuses[1].State, firstStatus.ContainerStatuses[1].State) {
|
||||
t.Errorf("existing terminated container state not preserved: %#v", newStatus.ContainerStatuses)
|
||||
}
|
||||
if !reflect.DeepEqual(newStatus.ContainerStatuses[2].State, firstStatus.ContainerStatuses[2].State) {
|
||||
t.Errorf("existing terminated container state not preserved: %#v", newStatus.ContainerStatuses)
|
||||
}
|
||||
|
||||
t.Logf("we expect the previous status update to be preserved.")
|
||||
assert.Equal(t, newStatus.Phase, firstStatus.Phase)
|
||||
assert.Equal(t, newStatus.Message, firstStatus.Message)
|
||||
|
Loading…
Reference in New Issue
Block a user