Merge pull request #110256 from bobbypage/terminal-ready-condition

kubelet: Mark ready condition as false explicitly for terminal pods
This commit is contained in:
Kubernetes Prow Robot 2022-06-08 20:07:42 -07:00 committed by GitHub
commit 226323178e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 199 additions and 16 deletions

View File

@ -317,6 +317,12 @@ func IsPodReadyConditionTrue(status v1.PodStatus) bool {
return condition != nil && condition.Status == v1.ConditionTrue
}
// IsContainersReadyConditionTrue returns true if a pod is ready; false otherwise.
func IsContainersReadyConditionTrue(status v1.PodStatus) bool {
condition := GetContainersReadyCondition(status)
return condition != nil && condition.Status == v1.ConditionTrue
}
// GetPodReadyCondition extracts the pod ready condition from the given status and returns that.
// Returns nil if the condition is not present.
func GetPodReadyCondition(status v1.PodStatus) *v1.PodCondition {
@ -324,6 +330,13 @@ func GetPodReadyCondition(status v1.PodStatus) *v1.PodCondition {
return condition
}
// GetContainersReadyCondition extracts the containers ready condition from the given status and returns that.
// Returns nil if the condition is not present.
func GetContainersReadyCondition(status v1.PodStatus) *v1.PodCondition {
_, condition := GetPodCondition(&status, v1.ContainersReady)
return condition
}
// GetPodCondition extracts the provided condition from the given status and returns that.
// Returns nil and -1 if the condition is not present, and the index of the located condition.
func GetPodCondition(status *v1.PodStatus, conditionType v1.PodConditionType) (int, *v1.PodCondition) {

View File

@ -904,3 +904,97 @@ func TestUpdatePodCondition(t *testing.T) {
assert.Equal(t, test.expected, resultStatus, test.desc)
}
}
func TestGetContainersReadyCondition(t *testing.T) {
time := metav1.Now()
containersReadyCondition := v1.PodCondition{
Type: v1.ContainersReady,
Status: v1.ConditionTrue,
Reason: "successfully",
Message: "sync pod successfully",
LastProbeTime: time,
LastTransitionTime: metav1.NewTime(time.Add(1000)),
}
tests := []struct {
desc string
podStatus v1.PodStatus
expectedCondition *v1.PodCondition
}{
{
desc: "containers ready condition exists",
podStatus: v1.PodStatus{
Conditions: []v1.PodCondition{containersReadyCondition},
},
expectedCondition: &containersReadyCondition,
},
{
desc: "containers ready condition does not exist",
podStatus: v1.PodStatus{
Conditions: []v1.PodCondition{},
},
expectedCondition: nil,
},
}
for _, test := range tests {
containersReadyCondition := GetContainersReadyCondition(test.podStatus)
assert.Equal(t, test.expectedCondition, containersReadyCondition, test.desc)
}
}
func TestIsContainersReadyConditionTrue(t *testing.T) {
time := metav1.Now()
tests := []struct {
desc string
podStatus v1.PodStatus
expected bool
}{
{
desc: "containers ready condition is true",
podStatus: v1.PodStatus{
Conditions: []v1.PodCondition{
{
Type: v1.ContainersReady,
Status: v1.ConditionTrue,
Reason: "successfully",
Message: "sync pod successfully",
LastProbeTime: time,
LastTransitionTime: metav1.NewTime(time.Add(1000)),
},
},
},
expected: true,
},
{
desc: "containers ready condition is false",
podStatus: v1.PodStatus{
Conditions: []v1.PodCondition{
{
Type: v1.ContainersReady,
Status: v1.ConditionFalse,
Reason: "successfully",
Message: "sync pod successfully",
LastProbeTime: time,
LastTransitionTime: metav1.NewTime(time.Add(1000)),
},
},
},
expected: false,
},
{
desc: "containers ready condition is empty",
podStatus: v1.PodStatus{
Conditions: []v1.PodCondition{},
},
expected: false,
},
}
for _, test := range tests {
isContainersReady := IsContainersReadyConditionTrue(test.podStatus)
assert.Equal(t, test.expected, isContainersReady, test.desc)
}
}

View File

@ -20,7 +20,7 @@ import (
"fmt"
"strings"
"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
)
@ -29,6 +29,8 @@ const (
UnknownContainerStatuses = "UnknownContainerStatuses"
// PodCompleted says that all related containers have succeeded.
PodCompleted = "PodCompleted"
// PodFailed says that the pod has failed and as such the containers have failed.
PodFailed = "PodFailed"
// ContainersNotReady says that one or more containers are not ready.
ContainersNotReady = "ContainersNotReady"
// ContainersNotInitialized says that one or more init containers have not succeeded.
@ -62,11 +64,12 @@ func GenerateContainersReadyCondition(spec *v1.PodSpec, containerStatuses []v1.C
// If all containers are known and succeeded, just return PodCompleted.
if podPhase == v1.PodSucceeded && len(unknownContainers) == 0 {
return v1.PodCondition{
Type: v1.ContainersReady,
Status: v1.ConditionFalse,
Reason: PodCompleted,
}
return generateContainersReadyConditionForTerminalPhase(podPhase)
}
// If the pod phase is failed, explicitly set the ready condition to false for containers since they may be in progress of terminating.
if podPhase == v1.PodFailed {
return generateContainersReadyConditionForTerminalPhase(podPhase)
}
// Generate message for containers in unknown condition.
@ -191,3 +194,33 @@ func GeneratePodInitializedCondition(spec *v1.PodSpec, containerStatuses []v1.Co
Status: v1.ConditionTrue,
}
}
func generateContainersReadyConditionForTerminalPhase(podPhase v1.PodPhase) v1.PodCondition {
condition := v1.PodCondition{
Type: v1.ContainersReady,
Status: v1.ConditionFalse,
}
if podPhase == v1.PodFailed {
condition.Reason = PodFailed
} else if podPhase == v1.PodSucceeded {
condition.Reason = PodCompleted
}
return condition
}
func generatePodReadyConditionForTerminalPhase(podPhase v1.PodPhase) v1.PodCondition {
condition := v1.PodCondition{
Type: v1.PodReady,
Status: v1.ConditionFalse,
}
if podPhase == v1.PodFailed {
condition.Reason = PodFailed
} else if podPhase == v1.PodSucceeded {
condition.Reason = PodCompleted
}
return condition
}

View File

@ -867,6 +867,20 @@ func mergePodStatus(oldPodStatus, newPodStatus v1.PodStatus, couldHaveRunningCon
}
}
// If the new phase is terminal, explicitly set the ready condition to false for v1.PodReady and v1.ContainersReady.
// It may take some time for kubelet to reconcile the ready condition, so explicitly set ready conditions to false if the phase is terminal.
// This is done to ensure kubelet does not report a status update with terminal pod phase and ready=true.
// See https://issues.k8s.io/108594 for more details.
if podutil.IsPodPhaseTerminal(newPodStatus.Phase) {
if podutil.IsPodReadyConditionTrue(newPodStatus) || podutil.IsContainersReadyConditionTrue(newPodStatus) {
containersReadyCondition := generateContainersReadyConditionForTerminalPhase(newPodStatus.Phase)
podutil.UpdatePodCondition(&newPodStatus, &containersReadyCondition)
podReadyCondition := generatePodReadyConditionForTerminalPhase(newPodStatus.Phase)
podutil.UpdatePodCondition(&newPodStatus, &podReadyCondition)
}
}
return newPodStatus
}

View File

@ -1548,7 +1548,11 @@ func TestMergePodStatus(t *testing.T) {
Conditions: []v1.PodCondition{
{
Type: v1.PodReady,
Status: v1.ConditionTrue,
Status: v1.ConditionFalse,
},
{
Type: v1.ContainersReady,
Status: v1.ConditionFalse,
},
{
Type: v1.PodScheduled,

View File

@ -28,7 +28,11 @@ import (
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/watch"
"k8s.io/client-go/tools/cache"
watchtools "k8s.io/client-go/tools/watch"
"k8s.io/kubectl/pkg/util/podutils"
admissionapi "k8s.io/pod-security-admission/api"
"github.com/onsi/ginkgo"
@ -40,6 +44,7 @@ import (
v1 "k8s.io/api/core/v1"
schedulingv1 "k8s.io/api/scheduling/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/kubernetes/pkg/features"
kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
kubelettypes "k8s.io/kubernetes/pkg/kubelet/types"
@ -102,6 +107,34 @@ var _ = SIGDescribe("GracefulNodeShutdown [Serial] [NodeFeature:GracefulNodeShut
framework.ExpectNoError(err)
framework.ExpectEqual(len(list.Items), len(pods), "the number of pods is not as expected")
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go func() {
defer ginkgo.GinkgoRecover()
w := &cache.ListWatch{
WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) {
return f.ClientSet.CoreV1().Pods(f.Namespace.Name).Watch(context.TODO(), options)
},
}
// Setup watch to continuously monitor any pod events and detect invalid pod status updates
_, err = watchtools.Until(ctx, list.ResourceVersion, w, func(event watch.Event) (bool, error) {
if pod, ok := event.Object.(*v1.Pod); ok {
if isPodStatusAffectedByIssue108594(pod) {
return false, fmt.Errorf("failing test due to detecting invalid pod status")
}
// Watch will never terminate (only when the test ends due to context cancellation)
return false, nil
}
return false, nil
})
// Ignore timeout error since the context will be explicitly cancelled and the watch will never return true
if err != nil && err != wait.ErrWaitTimeout {
framework.Failf("watch for invalid pod status failed: %v", err.Error())
}
}()
ginkgo.By("Verifying batch pods are running")
for _, pod := range list.Items {
if podReady, err := testutils.PodRunningReady(&pod); err != nil || !podReady {
@ -125,11 +158,6 @@ var _ = SIGDescribe("GracefulNodeShutdown [Serial] [NodeFeature:GracefulNodeShut
framework.ExpectEqual(len(list.Items), len(pods), "the number of pods is not as expected")
for _, pod := range list.Items {
if isPodStatusAffectedByIssue108594(&pod) {
framework.Logf("Detected invalid pod state for pod %q: pod status: %+v", pod.Name, pod.Status)
framework.Failf("failing test due to detecting invalid pod status")
}
if kubelettypes.IsCriticalPod(&pod) {
if isPodShutdown(&pod) {
framework.Logf("Expecting critical pod to be running, but it's not currently. Pod: %q, Pod Status %+v", pod.Name, pod.Status)
@ -157,10 +185,6 @@ var _ = SIGDescribe("GracefulNodeShutdown [Serial] [NodeFeature:GracefulNodeShut
framework.ExpectEqual(len(list.Items), len(pods), "the number of pods is not as expected")
for _, pod := range list.Items {
if isPodStatusAffectedByIssue108594(&pod) {
framework.Logf("Detected invalid pod state for pod %q: pod status: %+v", pod.Name, pod.Status)
framework.Failf("failing test due to detecting invalid pod status")
}
if !isPodShutdown(&pod) {
framework.Logf("Expecting pod to be shutdown, but it's not currently: Pod: %q, Pod Status %+v", pod.Name, pod.Status)
return fmt.Errorf("pod should be shutdown, phase: %s", pod.Status.Phase)
@ -171,6 +195,7 @@ var _ = SIGDescribe("GracefulNodeShutdown [Serial] [NodeFeature:GracefulNodeShut
// Critical pod starts shutdown after (nodeShutdownGracePeriod-nodeShutdownGracePeriodCriticalPods)
podStatusUpdateTimeout+(nodeShutdownGracePeriod-nodeShutdownGracePeriodCriticalPods),
pollInterval).Should(gomega.BeNil())
})
ginkgo.It("should be able to handle a cancelled shutdown", func() {