From ad3d8949f08766106c203e273b30c810994ebd99 Mon Sep 17 00:00:00 2001 From: Clayton Coleman Date: Tue, 14 May 2019 18:04:56 -0400 Subject: [PATCH] kubelet: Preserve existing container status when pod terminated The kubelet must not allow a container that was reported failed in a restartPolicy=Never pod to be reported to the apiserver as success. If a client deletes a restartPolicy=Never pod, the dispatchWork and status manager race to update the container status. When dispatchWork (specifically podIsTerminated) returns true, it means all containers are stopped, which means status in the container is accurate. However, the TerminatePod method then clears this status. This results in a pod that has been reported with status.phase=Failed getting reset to status.phase.Succeeded, which is a violation of the guarantees around terminal phase. Ensure the Kubelet never reports that a container succeeded when it hasn't run or been executed by guarding the terminate pod loop from ever reporting 0 in the absence of container status. --- pkg/kubelet/kubelet.go | 3 ++- pkg/kubelet/status/BUILD | 1 + pkg/kubelet/status/status_manager.go | 22 +++++++++++++-- pkg/kubelet/status/status_manager_test.go | 33 ++++++++++++++++++++++- 4 files changed, 55 insertions(+), 4 deletions(-) diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 2830f7c0709..c41098b94a8 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -2001,9 +2001,10 @@ func (kl *Kubelet) syncLoopIteration(configCh <-chan kubetypes.PodUpdate, handle } // dispatchWork starts the asynchronous sync of the pod in a pod worker. -// If the pod is terminated, dispatchWork +// If the pod is terminated, dispatchWork will perform no action. func (kl *Kubelet) dispatchWork(pod *v1.Pod, syncType kubetypes.SyncPodType, mirrorPod *v1.Pod, start time.Time) { if kl.podIsTerminated(pod) { + klog.V(4).Infof("Pod %q is terminated, ignoring remaining sync work: %s", format.Pod(pod), syncType) if pod.DeletionTimestamp != nil { // If the pod is in a terminated state, there is no pod worker to // handle the work item. Check if the DeletionTimestamp has been diff --git a/pkg/kubelet/status/BUILD b/pkg/kubelet/status/BUILD index 0b590440e58..314a4065555 100644 --- a/pkg/kubelet/status/BUILD +++ b/pkg/kubelet/status/BUILD @@ -54,6 +54,7 @@ go_test( "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/runtime/schema:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/util/diff:go_default_library", "//staging/src/k8s.io/client-go/kubernetes:go_default_library", "//staging/src/k8s.io/client-go/kubernetes/fake:go_default_library", "//staging/src/k8s.io/client-go/testing:go_default_library", diff --git a/pkg/kubelet/status/status_manager.go b/pkg/kubelet/status/status_manager.go index d216a31e576..f42aed3f070 100644 --- a/pkg/kubelet/status/status_manager.go +++ b/pkg/kubelet/status/status_manager.go @@ -314,21 +314,39 @@ func findContainerStatus(status *v1.PodStatus, containerID string) (containerSta func (m *manager) TerminatePod(pod *v1.Pod) { m.podStatusesLock.Lock() defer m.podStatusesLock.Unlock() + + // ensure that all containers have a terminated state - because we do not know whether the container + // was successful, always report an error oldStatus := &pod.Status if cachedStatus, ok := m.podStatuses[pod.UID]; ok { oldStatus = &cachedStatus.status } status := *oldStatus.DeepCopy() for i := range status.ContainerStatuses { + if status.ContainerStatuses[i].State.Terminated != nil || status.ContainerStatuses[i].State.Waiting != nil { + continue + } status.ContainerStatuses[i].State = v1.ContainerState{ - Terminated: &v1.ContainerStateTerminated{}, + Terminated: &v1.ContainerStateTerminated{ + Reason: "ContainerStatusUnknown", + Message: "The container could not be located when the pod was terminated", + ExitCode: 137, + }, } } for i := range status.InitContainerStatuses { + if status.InitContainerStatuses[i].State.Terminated != nil || status.InitContainerStatuses[i].State.Waiting != nil { + continue + } status.InitContainerStatuses[i].State = v1.ContainerState{ - Terminated: &v1.ContainerStateTerminated{}, + Terminated: &v1.ContainerStateTerminated{ + Reason: "ContainerStatusUnknown", + Message: "The container could not be located when the pod was terminated", + ExitCode: 137, + }, } } + m.updateStatusInternal(pod, status, true) } diff --git a/pkg/kubelet/status/status_manager_test.go b/pkg/kubelet/status/status_manager_test.go index f880a1e6947..eb3684ca87b 100644 --- a/pkg/kubelet/status/status_manager_test.go +++ b/pkg/kubelet/status/status_manager_test.go @@ -27,11 +27,12 @@ import ( "github.com/stretchr/testify/assert" - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/util/diff" clientset "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/fake" core "k8s.io/client-go/testing" @@ -569,6 +570,16 @@ func TestTerminatePod(t *testing.T) { t.Logf("update the pod's status to Failed. TerminatePod should preserve this status update.") firstStatus := getRandomPodStatus() firstStatus.Phase = v1.PodFailed + firstStatus.InitContainerStatuses = []v1.ContainerStatus{ + {Name: "init-test-1"}, + {Name: "init-test-2", State: v1.ContainerState{Terminated: &v1.ContainerStateTerminated{Reason: "InitTest", ExitCode: 0}}}, + {Name: "init-test-3", State: v1.ContainerState{Terminated: &v1.ContainerStateTerminated{Reason: "InitTest", ExitCode: 3}}}, + } + firstStatus.ContainerStatuses = []v1.ContainerStatus{ + {Name: "test-1"}, + {Name: "test-2", State: v1.ContainerState{Terminated: &v1.ContainerStateTerminated{Reason: "Test", ExitCode: 2}}}, + {Name: "test-3", State: v1.ContainerState{Terminated: &v1.ContainerStateTerminated{Reason: "Test", ExitCode: 0}}}, + } syncer.SetPodStatus(testPod, firstStatus) t.Logf("set the testPod to a pod with Phase running, to simulate a stale pod") @@ -586,6 +597,26 @@ func TestTerminatePod(t *testing.T) { assert.False(t, newStatus.InitContainerStatuses[i].State.Terminated == nil, "expected init containers to be terminated") } + expectUnknownState := v1.ContainerState{Terminated: &v1.ContainerStateTerminated{Reason: "ContainerStatusUnknown", Message: "The container could not be located when the pod was terminated", ExitCode: 137}} + if !reflect.DeepEqual(newStatus.InitContainerStatuses[0].State, expectUnknownState) { + t.Errorf("terminated container state not defaulted: %s", diff.ObjectReflectDiff(newStatus.InitContainerStatuses[0].State, expectUnknownState)) + } + if !reflect.DeepEqual(newStatus.InitContainerStatuses[1].State, firstStatus.InitContainerStatuses[1].State) { + t.Errorf("existing terminated container state not preserved: %#v", newStatus.ContainerStatuses) + } + if !reflect.DeepEqual(newStatus.InitContainerStatuses[2].State, firstStatus.InitContainerStatuses[2].State) { + t.Errorf("existing terminated container state not preserved: %#v", newStatus.ContainerStatuses) + } + if !reflect.DeepEqual(newStatus.ContainerStatuses[0].State, expectUnknownState) { + t.Errorf("terminated container state not defaulted: %s", diff.ObjectReflectDiff(newStatus.ContainerStatuses[0].State, expectUnknownState)) + } + if !reflect.DeepEqual(newStatus.ContainerStatuses[1].State, firstStatus.ContainerStatuses[1].State) { + t.Errorf("existing terminated container state not preserved: %#v", newStatus.ContainerStatuses) + } + if !reflect.DeepEqual(newStatus.ContainerStatuses[2].State, firstStatus.ContainerStatuses[2].State) { + t.Errorf("existing terminated container state not preserved: %#v", newStatus.ContainerStatuses) + } + t.Logf("we expect the previous status update to be preserved.") assert.Equal(t, newStatus.Phase, firstStatus.Phase) assert.Equal(t, newStatus.Message, firstStatus.Message)