Merge pull request #126653 from zhifei92/fix-podstatus

fix the issue of losing the pending phase after a node restart.
This commit is contained in:
Kubernetes Prow Robot 2024-11-07 21:06:54 +00:00 committed by GitHub
commit 1dd81aa1c9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 283 additions and 42 deletions

View File

@ -1579,7 +1579,8 @@ func (kl *Kubelet) GetKubeletContainerLogs(ctx context.Context, podFullName, con
// getPhase returns the phase of a pod given its container info.
func getPhase(pod *v1.Pod, info []v1.ContainerStatus, podIsTerminal bool) v1.PodPhase {
spec := pod.Spec
pendingInitialization := 0
pendingRestartableInitContainers := 0
pendingRegularInitContainers := 0
failedInitialization := 0
// regular init containers
@ -1593,13 +1594,13 @@ func getPhase(pod *v1.Pod, info []v1.ContainerStatus, podIsTerminal bool) v1.Pod
containerStatus, ok := podutil.GetContainerStatus(info, container.Name)
if !ok {
pendingInitialization++
pendingRegularInitContainers++
continue
}
switch {
case containerStatus.State.Running != nil:
pendingInitialization++
pendingRegularInitContainers++
case containerStatus.State.Terminated != nil:
if containerStatus.State.Terminated.ExitCode != 0 {
failedInitialization++
@ -1610,10 +1611,10 @@ func getPhase(pod *v1.Pod, info []v1.ContainerStatus, podIsTerminal bool) v1.Pod
failedInitialization++
}
} else {
pendingInitialization++
pendingRegularInitContainers++
}
default:
pendingInitialization++
pendingRegularInitContainers++
}
}
@ -1624,38 +1625,40 @@ func getPhase(pod *v1.Pod, info []v1.ContainerStatus, podIsTerminal bool) v1.Pod
stopped := 0
succeeded := 0
// restartable init containers
for _, container := range spec.InitContainers {
if !kubetypes.IsRestartableInitContainer(&container) {
// Skip the regular init containers, as they have been handled above.
continue
}
containerStatus, ok := podutil.GetContainerStatus(info, container.Name)
if !ok {
unknown++
continue
}
switch {
case containerStatus.State.Running != nil:
if containerStatus.Started == nil || !*containerStatus.Started {
pendingInitialization++
if utilfeature.DefaultFeatureGate.Enabled(features.SidecarContainers) {
// restartable init containers
for _, container := range spec.InitContainers {
if !kubetypes.IsRestartableInitContainer(&container) {
// Skip the regular init containers, as they have been handled above.
continue
}
running++
case containerStatus.State.Terminated != nil:
// Do nothing here, as terminated restartable init containers are not
// taken into account for the pod phase.
case containerStatus.State.Waiting != nil:
if containerStatus.LastTerminationState.Terminated != nil {
containerStatus, ok := podutil.GetContainerStatus(info, container.Name)
if !ok {
unknown++
continue
}
switch {
case containerStatus.State.Running != nil:
if containerStatus.Started == nil || !*containerStatus.Started {
pendingRestartableInitContainers++
}
running++
case containerStatus.State.Terminated != nil:
// Do nothing here, as terminated restartable init containers are not
// taken into account for the pod phase.
} else {
pendingInitialization++
waiting++
case containerStatus.State.Waiting != nil:
if containerStatus.LastTerminationState.Terminated != nil {
// Do nothing here, as terminated restartable init containers are not
// taken into account for the pod phase.
} else {
pendingRestartableInitContainers++
waiting++
}
default:
pendingRestartableInitContainers++
unknown++
}
default:
pendingInitialization++
unknown++
}
}
@ -1690,11 +1693,12 @@ func getPhase(pod *v1.Pod, info []v1.ContainerStatus, podIsTerminal bool) v1.Pod
}
switch {
case pendingInitialization > 0 &&
// This is needed to handle the case where the pod has been initialized but
// the restartable init containers are restarting and the pod should not be
// placed back into v1.PodPending since the regular containers have run.
!kubecontainer.HasAnyRegularContainerStarted(&spec, info):
case pendingRegularInitContainers > 0 ||
(pendingRestartableInitContainers > 0 &&
// This is needed to handle the case where the pod has been initialized but
// the restartable init containers are restarting and the pod should not be
// placed back into v1.PodPending since the regular containers have run.
!kubecontainer.HasAnyRegularContainerStarted(&spec, info)):
fallthrough
case waiting > 0:
klog.V(5).InfoS("Pod waiting > 0, pending")

View File

@ -2408,7 +2408,8 @@ func TestPodPhaseWithRestartAlwaysInitContainers(t *testing.T) {
},
}
for _, test := range tests {
statusInfo := append(test.pod.Status.InitContainerStatuses[:], test.pod.Status.ContainerStatuses[:]...)
statusInfo := test.pod.Status.InitContainerStatuses
statusInfo = append(statusInfo, test.pod.Status.ContainerStatuses...)
status := getPhase(test.pod, statusInfo, false)
assert.Equal(t, test.status, status, "[test %s]", test.test)
}
@ -2620,12 +2621,105 @@ func TestPodPhaseWithRestartAlwaysRestartableInitContainers(t *testing.T) {
}
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SidecarContainers, true)
for _, test := range tests {
statusInfo := append(test.pod.Status.InitContainerStatuses[:], test.pod.Status.ContainerStatuses[:]...)
statusInfo := test.pod.Status.InitContainerStatuses
statusInfo = append(statusInfo, test.pod.Status.ContainerStatuses...)
status := getPhase(test.pod, statusInfo, test.podIsTerminal)
assert.Equal(t, test.status, status, "[test %s]", test.test)
}
}
func TestPodPhaseWithRestartAlwaysAndPodHasRun(t *testing.T) {
desiredState := v1.PodSpec{
NodeName: "machine",
InitContainers: []v1.Container{
{Name: "containerX"},
{Name: "containerY", RestartPolicy: &containerRestartPolicyAlways},
},
Containers: []v1.Container{
{Name: "containerA"},
},
RestartPolicy: v1.RestartPolicyAlways,
}
tests := []struct {
pod *v1.Pod
status v1.PodPhase
test string
}{
{
&v1.Pod{
Spec: desiredState,
Status: v1.PodStatus{
InitContainerStatuses: []v1.ContainerStatus{
runningState("containerX"),
runningState("containerY"),
},
ContainerStatuses: []v1.ContainerStatus{
runningState("containerA"),
},
},
},
v1.PodPending,
"regular init containers, restartable init container and regular container are all running",
},
{
&v1.Pod{
Spec: desiredState,
Status: v1.PodStatus{
InitContainerStatuses: []v1.ContainerStatus{
runningState("containerX"),
runningState("containerY"),
},
ContainerStatuses: []v1.ContainerStatus{
stoppedState("containerA"),
},
},
},
v1.PodPending,
"regular containers is stopped, restartable init container and regular int container are both running",
},
{
&v1.Pod{
Spec: desiredState,
Status: v1.PodStatus{
InitContainerStatuses: []v1.ContainerStatus{
succeededState("containerX"),
runningState("containerY"),
},
ContainerStatuses: []v1.ContainerStatus{
stoppedState("containerA"),
},
},
},
v1.PodRunning,
"regular init container is succeeded, restartable init container is running, regular containers is stopped",
},
{
&v1.Pod{
Spec: desiredState,
Status: v1.PodStatus{
InitContainerStatuses: []v1.ContainerStatus{
succeededState("containerX"),
runningState("containerY"),
},
ContainerStatuses: []v1.ContainerStatus{
runningState("containerA"),
},
},
},
v1.PodRunning,
"regular init container is succeeded, restartable init container and regular containers are both running",
},
}
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SidecarContainers, true)
for _, test := range tests {
statusInfo := test.pod.Status.InitContainerStatuses
statusInfo = append(statusInfo, test.pod.Status.ContainerStatuses...)
status := getPhase(test.pod, statusInfo, false)
assert.Equal(t, test.status, status, "[test %s]", test.test)
}
}
func TestPodPhaseWithRestartNever(t *testing.T) {
desiredState := v1.PodSpec{
NodeName: "machine",
@ -2823,7 +2917,8 @@ func TestPodPhaseWithRestartNeverInitContainers(t *testing.T) {
},
}
for _, test := range tests {
statusInfo := append(test.pod.Status.InitContainerStatuses[:], test.pod.Status.ContainerStatuses[:]...)
statusInfo := test.pod.Status.InitContainerStatuses
statusInfo = append(statusInfo, test.pod.Status.ContainerStatuses...)
status := getPhase(test.pod, statusInfo, false)
assert.Equal(t, test.status, status, "[test %s]", test.test)
}
@ -3022,7 +3117,8 @@ func TestPodPhaseWithRestartNeverRestartableInitContainers(t *testing.T) {
}
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SidecarContainers, true)
for _, test := range tests {
statusInfo := append(test.pod.Status.InitContainerStatuses[:], test.pod.Status.ContainerStatuses[:]...)
statusInfo := test.pod.Status.InitContainerStatuses
statusInfo = append(statusInfo, test.pod.Status.ContainerStatuses...)
status := getPhase(test.pod, statusInfo, false)
assert.Equal(t, test.status, status, "[test %s]", test.test)
}

View File

@ -809,6 +809,18 @@ func WaitForPodContainerStarted(ctx context.Context, c clientset.Interface, name
})
}
// WaitForPodInitContainerStarted waits for the given Pod init container to start.
func WaitForPodInitContainerStarted(ctx context.Context, c clientset.Interface, namespace, podName string, initContainerIndex int, timeout time.Duration) error {
conditionDesc := fmt.Sprintf("init container %d started", initContainerIndex)
return WaitForPodCondition(ctx, c, namespace, podName, conditionDesc, timeout, func(pod *v1.Pod) (bool, error) {
if initContainerIndex > len(pod.Status.InitContainerStatuses)-1 {
return false, nil
}
initContainerStatus := pod.Status.InitContainerStatuses[initContainerIndex]
return *initContainerStatus.Started, nil
})
}
// WaitForPodFailedReason wait for pod failed reason in status, for example "SysctlForbidden".
func WaitForPodFailedReason(ctx context.Context, c clientset.Interface, pod *v1.Pod, reason string, timeout time.Duration) error {
conditionDesc := fmt.Sprintf("failed with reason %s", reason)

View File

@ -0,0 +1,129 @@
/*
Copyright 2024 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package e2enode
import (
"context"
"github.com/onsi/ginkgo/v2"
"github.com/onsi/gomega"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
"k8s.io/kubernetes/test/e2e/framework"
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
admissionapi "k8s.io/pod-security-admission/api"
)
var _ = SIGDescribe(framework.WithSerial(), "Pods status phase", func() {
f := framework.NewDefaultFramework("pods-status-phase-test-serial")
addAfterEachForCleaningUpPods(f)
f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
ginkgo.It("should be pending during the execution of the init container after the node reboot", func(ctx context.Context) {
init := "init"
regular := "regular"
podLabels := map[string]string{
"test": "pods-status-phase-test-serial",
"namespace": f.Namespace.Name,
}
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "initialized-pod",
Labels: podLabels,
},
Spec: v1.PodSpec{
RestartPolicy: v1.RestartPolicyAlways,
InitContainers: []v1.Container{
{
Name: init,
Image: busyboxImage,
Command: ExecCommand(init, execCommand{
Delay: 30,
ExitCode: 0,
}),
},
},
Containers: []v1.Container{
{
Name: regular,
Image: busyboxImage,
Command: ExecCommand(regular, execCommand{
Delay: 300,
ExitCode: 0,
}),
},
},
},
}
preparePod(pod)
client := e2epod.NewPodClient(f)
pod = client.Create(ctx, pod)
ginkgo.By("Waiting for the pod's status to become Running")
err := e2epod.WaitForPodNameRunningInNamespace(ctx, f.ClientSet, pod.Name, pod.Namespace)
framework.ExpectNoError(err)
ginkgo.By("Getting the current pod sandbox ID")
rs, _, err := getCRIClient()
framework.ExpectNoError(err)
sandboxes, err := rs.ListPodSandbox(ctx, &runtimeapi.PodSandboxFilter{
LabelSelector: podLabels,
})
framework.ExpectNoError(err)
gomega.Expect(sandboxes).To(gomega.HaveLen(1))
podSandboxID := sandboxes[0].Id
// We need to wait for the pod to be Running before simulating the node reboot,
// to avoid any unintended effects from the previous init container state.
// Simulate node reboot by restarting the kubelet and the pod sandbox.
ginkgo.By("Stopping the kubelet")
startKubelet := mustStopKubelet(ctx, f)
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("kubelet should be stopped"))
ginkgo.By("Stopping the pod sandbox")
err = rs.StopPodSandbox(ctx, podSandboxID)
framework.ExpectNoError(err)
ginkgo.By("Starting the kubelet")
startKubelet(ctx)
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("kubelet should be started"))
ginkgo.By("Waiting for the regular init container to be started after the node reboot")
err = e2epod.WaitForPodInitContainerStarted(ctx, f.ClientSet, pod.Namespace, pod.Name, 0, f.Timeouts.PodStart)
framework.ExpectNoError(err)
pod, err = client.Get(ctx, pod.Name, metav1.GetOptions{})
framework.ExpectNoError(err)
gomega.Expect(pod.Status.Phase == v1.PodPending).To(gomega.BeTrueBecause("pod should be pending during the execution of the init container after the node reboot"))
ginkgo.By("Parse the logs of the pod after the kubelet restart")
results := parseOutput(ctx, f, pod)
_, err = results.TimeOfStart(init)
framework.ExpectNoError(err, "After the node restarts, the init container should restart.")
ginkgo.By("Verifying that the pod fully starts")
err = e2epod.WaitForPodNameRunningInNamespace(ctx, f.ClientSet, pod.Name, pod.Namespace)
framework.ExpectNoError(err)
})
})