From 7811d84fefaf0df1f68b7b94988a36c762c8e383 Mon Sep 17 00:00:00 2001 From: David Porter Date: Fri, 27 May 2022 13:21:20 -0700 Subject: [PATCH 1/2] kubelet: Mark ready condition as false explicitly for terminal pods Terminal pods may continue to report a ready condition of true because there is a delay in reconciling the ready condition of the containers from the runtime with the pod status. It should be invalid for kubelet to report a terminal phase with a true ready condition. To fix the issue, explicitly override the ready condition to false for terminal pods during status updates. Signed-off-by: David Porter --- pkg/api/v1/pod/util.go | 13 ++++ pkg/api/v1/pod/util_test.go | 94 +++++++++++++++++++++++ pkg/kubelet/status/generate.go | 45 +++++++++-- pkg/kubelet/status/status_manager.go | 14 ++++ pkg/kubelet/status/status_manager_test.go | 6 +- 5 files changed, 165 insertions(+), 7 deletions(-) diff --git a/pkg/api/v1/pod/util.go b/pkg/api/v1/pod/util.go index 4ae77d543a2..8bfc21a67f4 100644 --- a/pkg/api/v1/pod/util.go +++ b/pkg/api/v1/pod/util.go @@ -317,6 +317,12 @@ func IsPodReadyConditionTrue(status v1.PodStatus) bool { return condition != nil && condition.Status == v1.ConditionTrue } +// IsContainersReadyConditionTrue returns true if a pod is ready; false otherwise. +func IsContainersReadyConditionTrue(status v1.PodStatus) bool { + condition := GetContainersReadyCondition(status) + return condition != nil && condition.Status == v1.ConditionTrue +} + // GetPodReadyCondition extracts the pod ready condition from the given status and returns that. // Returns nil if the condition is not present. func GetPodReadyCondition(status v1.PodStatus) *v1.PodCondition { @@ -324,6 +330,13 @@ func GetPodReadyCondition(status v1.PodStatus) *v1.PodCondition { return condition } +// GetContainersReadyCondition extracts the containers ready condition from the given status and returns that. +// Returns nil if the condition is not present. +func GetContainersReadyCondition(status v1.PodStatus) *v1.PodCondition { + _, condition := GetPodCondition(&status, v1.ContainersReady) + return condition +} + // GetPodCondition extracts the provided condition from the given status and returns that. // Returns nil and -1 if the condition is not present, and the index of the located condition. func GetPodCondition(status *v1.PodStatus, conditionType v1.PodConditionType) (int, *v1.PodCondition) { diff --git a/pkg/api/v1/pod/util_test.go b/pkg/api/v1/pod/util_test.go index 78b1b92f376..666f3aa0b5f 100644 --- a/pkg/api/v1/pod/util_test.go +++ b/pkg/api/v1/pod/util_test.go @@ -904,3 +904,97 @@ func TestUpdatePodCondition(t *testing.T) { assert.Equal(t, test.expected, resultStatus, test.desc) } } + +func TestGetContainersReadyCondition(t *testing.T) { + time := metav1.Now() + + containersReadyCondition := v1.PodCondition{ + Type: v1.ContainersReady, + Status: v1.ConditionTrue, + Reason: "successfully", + Message: "sync pod successfully", + LastProbeTime: time, + LastTransitionTime: metav1.NewTime(time.Add(1000)), + } + + tests := []struct { + desc string + podStatus v1.PodStatus + expectedCondition *v1.PodCondition + }{ + { + desc: "containers ready condition exists", + podStatus: v1.PodStatus{ + Conditions: []v1.PodCondition{containersReadyCondition}, + }, + expectedCondition: &containersReadyCondition, + }, + { + desc: "containers ready condition does not exist", + podStatus: v1.PodStatus{ + Conditions: []v1.PodCondition{}, + }, + expectedCondition: nil, + }, + } + + for _, test := range tests { + containersReadyCondition := GetContainersReadyCondition(test.podStatus) + assert.Equal(t, test.expectedCondition, containersReadyCondition, test.desc) + } +} + +func TestIsContainersReadyConditionTrue(t *testing.T) { + time := metav1.Now() + + tests := []struct { + desc string + podStatus v1.PodStatus + expected bool + }{ + { + desc: "containers ready condition is true", + podStatus: v1.PodStatus{ + Conditions: []v1.PodCondition{ + { + Type: v1.ContainersReady, + Status: v1.ConditionTrue, + Reason: "successfully", + Message: "sync pod successfully", + LastProbeTime: time, + LastTransitionTime: metav1.NewTime(time.Add(1000)), + }, + }, + }, + expected: true, + }, + { + desc: "containers ready condition is false", + podStatus: v1.PodStatus{ + Conditions: []v1.PodCondition{ + { + Type: v1.ContainersReady, + Status: v1.ConditionFalse, + Reason: "successfully", + Message: "sync pod successfully", + LastProbeTime: time, + LastTransitionTime: metav1.NewTime(time.Add(1000)), + }, + }, + }, + expected: false, + }, + { + desc: "containers ready condition is empty", + podStatus: v1.PodStatus{ + Conditions: []v1.PodCondition{}, + }, + expected: false, + }, + } + + for _, test := range tests { + isContainersReady := IsContainersReadyConditionTrue(test.podStatus) + assert.Equal(t, test.expected, isContainersReady, test.desc) + } +} diff --git a/pkg/kubelet/status/generate.go b/pkg/kubelet/status/generate.go index ef87faa993c..024f1cf4bc2 100644 --- a/pkg/kubelet/status/generate.go +++ b/pkg/kubelet/status/generate.go @@ -20,7 +20,7 @@ import ( "fmt" "strings" - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" podutil "k8s.io/kubernetes/pkg/api/v1/pod" ) @@ -29,6 +29,8 @@ const ( UnknownContainerStatuses = "UnknownContainerStatuses" // PodCompleted says that all related containers have succeeded. PodCompleted = "PodCompleted" + // PodFailed says that the pod has failed and as such the containers have failed. + PodFailed = "PodFailed" // ContainersNotReady says that one or more containers are not ready. ContainersNotReady = "ContainersNotReady" // ContainersNotInitialized says that one or more init containers have not succeeded. @@ -62,11 +64,12 @@ func GenerateContainersReadyCondition(spec *v1.PodSpec, containerStatuses []v1.C // If all containers are known and succeeded, just return PodCompleted. if podPhase == v1.PodSucceeded && len(unknownContainers) == 0 { - return v1.PodCondition{ - Type: v1.ContainersReady, - Status: v1.ConditionFalse, - Reason: PodCompleted, - } + return generateContainersReadyConditionForTerminalPhase(podPhase) + } + + // If the pod phase is failed, explicitly set the ready condition to false for containers since they may be in progress of terminating. + if podPhase == v1.PodFailed { + return generateContainersReadyConditionForTerminalPhase(podPhase) } // Generate message for containers in unknown condition. @@ -191,3 +194,33 @@ func GeneratePodInitializedCondition(spec *v1.PodSpec, containerStatuses []v1.Co Status: v1.ConditionTrue, } } + +func generateContainersReadyConditionForTerminalPhase(podPhase v1.PodPhase) v1.PodCondition { + condition := v1.PodCondition{ + Type: v1.ContainersReady, + Status: v1.ConditionFalse, + } + + if podPhase == v1.PodFailed { + condition.Reason = PodFailed + } else if podPhase == v1.PodSucceeded { + condition.Reason = PodCompleted + } + + return condition +} + +func generatePodReadyConditionForTerminalPhase(podPhase v1.PodPhase) v1.PodCondition { + condition := v1.PodCondition{ + Type: v1.PodReady, + Status: v1.ConditionFalse, + } + + if podPhase == v1.PodFailed { + condition.Reason = PodFailed + } else if podPhase == v1.PodSucceeded { + condition.Reason = PodCompleted + } + + return condition +} diff --git a/pkg/kubelet/status/status_manager.go b/pkg/kubelet/status/status_manager.go index 845137e0656..47036de00df 100644 --- a/pkg/kubelet/status/status_manager.go +++ b/pkg/kubelet/status/status_manager.go @@ -867,6 +867,20 @@ func mergePodStatus(oldPodStatus, newPodStatus v1.PodStatus, couldHaveRunningCon } } + // If the new phase is terminal, explicitly set the ready condition to false for v1.PodReady and v1.ContainersReady. + // It may take some time for kubelet to reconcile the ready condition, so explicitly set ready conditions to false if the phase is terminal. + // This is done to ensure kubelet does not report a status update with terminal pod phase and ready=true. + // See https://issues.k8s.io/108594 for more details. + if podutil.IsPodPhaseTerminal(newPodStatus.Phase) { + if podutil.IsPodReadyConditionTrue(newPodStatus) || podutil.IsContainersReadyConditionTrue(newPodStatus) { + containersReadyCondition := generateContainersReadyConditionForTerminalPhase(newPodStatus.Phase) + podutil.UpdatePodCondition(&newPodStatus, &containersReadyCondition) + + podReadyCondition := generatePodReadyConditionForTerminalPhase(newPodStatus.Phase) + podutil.UpdatePodCondition(&newPodStatus, &podReadyCondition) + } + } + return newPodStatus } diff --git a/pkg/kubelet/status/status_manager_test.go b/pkg/kubelet/status/status_manager_test.go index 9ecdf67b300..938b1d6ea25 100644 --- a/pkg/kubelet/status/status_manager_test.go +++ b/pkg/kubelet/status/status_manager_test.go @@ -1548,7 +1548,11 @@ func TestMergePodStatus(t *testing.T) { Conditions: []v1.PodCondition{ { Type: v1.PodReady, - Status: v1.ConditionTrue, + Status: v1.ConditionFalse, + }, + { + Type: v1.ContainersReady, + Status: v1.ConditionFalse, }, { Type: v1.PodScheduled, From b4b338d4ebe84eb7ccbd61f07ca6352ff8e16795 Mon Sep 17 00:00:00 2001 From: David Porter Date: Mon, 6 Jun 2022 18:12:59 -0700 Subject: [PATCH 2/2] test: update graceful node shutdown e2e with watch Use a watch to detect invalid pod status updates in graceful node shutdown node e2e test. By using a watch, all pod updates will be captured while the previous logic required polling the api-server which could miss some intermediate updates. Signed-off-by: David Porter --- test/e2e_node/node_shutdown_linux_test.go | 43 ++++++++++++++++++----- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/test/e2e_node/node_shutdown_linux_test.go b/test/e2e_node/node_shutdown_linux_test.go index ccb31e37e5e..32ff9ccb549 100644 --- a/test/e2e_node/node_shutdown_linux_test.go +++ b/test/e2e_node/node_shutdown_linux_test.go @@ -28,7 +28,11 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/fields" + "k8s.io/apimachinery/pkg/watch" + "k8s.io/client-go/tools/cache" + watchtools "k8s.io/client-go/tools/watch" "k8s.io/kubectl/pkg/util/podutils" + admissionapi "k8s.io/pod-security-admission/api" "github.com/onsi/ginkgo" @@ -40,6 +44,7 @@ import ( v1 "k8s.io/api/core/v1" schedulingv1 "k8s.io/api/scheduling/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" "k8s.io/kubernetes/pkg/features" kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" kubelettypes "k8s.io/kubernetes/pkg/kubelet/types" @@ -102,6 +107,34 @@ var _ = SIGDescribe("GracefulNodeShutdown [Serial] [NodeFeature:GracefulNodeShut framework.ExpectNoError(err) framework.ExpectEqual(len(list.Items), len(pods), "the number of pods is not as expected") + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go func() { + defer ginkgo.GinkgoRecover() + w := &cache.ListWatch{ + WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { + return f.ClientSet.CoreV1().Pods(f.Namespace.Name).Watch(context.TODO(), options) + }, + } + + // Setup watch to continuously monitor any pod events and detect invalid pod status updates + _, err = watchtools.Until(ctx, list.ResourceVersion, w, func(event watch.Event) (bool, error) { + if pod, ok := event.Object.(*v1.Pod); ok { + if isPodStatusAffectedByIssue108594(pod) { + return false, fmt.Errorf("failing test due to detecting invalid pod status") + } + // Watch will never terminate (only when the test ends due to context cancellation) + return false, nil + } + return false, nil + }) + + // Ignore timeout error since the context will be explicitly cancelled and the watch will never return true + if err != nil && err != wait.ErrWaitTimeout { + framework.Failf("watch for invalid pod status failed: %v", err.Error()) + } + }() + ginkgo.By("Verifying batch pods are running") for _, pod := range list.Items { if podReady, err := testutils.PodRunningReady(&pod); err != nil || !podReady { @@ -125,11 +158,6 @@ var _ = SIGDescribe("GracefulNodeShutdown [Serial] [NodeFeature:GracefulNodeShut framework.ExpectEqual(len(list.Items), len(pods), "the number of pods is not as expected") for _, pod := range list.Items { - if isPodStatusAffectedByIssue108594(&pod) { - framework.Logf("Detected invalid pod state for pod %q: pod status: %+v", pod.Name, pod.Status) - framework.Failf("failing test due to detecting invalid pod status") - } - if kubelettypes.IsCriticalPod(&pod) { if isPodShutdown(&pod) { framework.Logf("Expecting critical pod to be running, but it's not currently. Pod: %q, Pod Status %+v", pod.Name, pod.Status) @@ -157,10 +185,6 @@ var _ = SIGDescribe("GracefulNodeShutdown [Serial] [NodeFeature:GracefulNodeShut framework.ExpectEqual(len(list.Items), len(pods), "the number of pods is not as expected") for _, pod := range list.Items { - if isPodStatusAffectedByIssue108594(&pod) { - framework.Logf("Detected invalid pod state for pod %q: pod status: %+v", pod.Name, pod.Status) - framework.Failf("failing test due to detecting invalid pod status") - } if !isPodShutdown(&pod) { framework.Logf("Expecting pod to be shutdown, but it's not currently: Pod: %q, Pod Status %+v", pod.Name, pod.Status) return fmt.Errorf("pod should be shutdown, phase: %s", pod.Status.Phase) @@ -171,6 +195,7 @@ var _ = SIGDescribe("GracefulNodeShutdown [Serial] [NodeFeature:GracefulNodeShut // Critical pod starts shutdown after (nodeShutdownGracePeriod-nodeShutdownGracePeriodCriticalPods) podStatusUpdateTimeout+(nodeShutdownGracePeriod-nodeShutdownGracePeriodCriticalPods), pollInterval).Should(gomega.BeNil()) + }) ginkgo.It("should be able to handle a cancelled shutdown", func() {