From 657ccc30995ec72535bdadab069ab2ced1ecc156 Mon Sep 17 00:00:00 2001 From: Gunju Kim Date: Thu, 14 Nov 2024 20:31:20 +0900 Subject: [PATCH] Ensure that the pod has the proper phase upon re-initialization This fixes the pod with restartable init containers to have a proper phase after the pod sandbox re-creation. Currently, the `runtime.PodStatus` cannot retrieve the active container statuses, which are the container statuses associated with the current pod sandbox. This adds the `ActiveContainerStatuses` to `runtime.PodStatus`, allowing it to include the container statuses of the current pod sandbox, and fixes the kubelet to correctly set the pod Phase to `Pending` when no active regular containers are present. --- pkg/kubelet/container/helpers.go | 25 ++++ pkg/kubelet/container/helpers_test.go | 140 ++++++++++++++++++ pkg/kubelet/container/runtime.go | 2 + pkg/kubelet/kubelet_pods.go | 6 +- pkg/kubelet/kubelet_pods_test.go | 108 +++++++++++--- .../kuberuntime/kuberuntime_container.go | 15 +- .../kuberuntime_container_linux_test.go | 2 +- .../kuberuntime/kuberuntime_container_test.go | 2 +- .../kuberuntime/kuberuntime_manager.go | 22 +-- test/e2e_node/container_lifecycle_test.go | 14 +- 10 files changed, 291 insertions(+), 45 deletions(-) diff --git a/pkg/kubelet/container/helpers.go b/pkg/kubelet/container/helpers.go index fb43305faf2..9e6780310e6 100644 --- a/pkg/kubelet/container/helpers.go +++ b/pkg/kubelet/container/helpers.go @@ -396,6 +396,8 @@ func MakePortMappings(container *v1.Container) (ports []PortMapping) { // HasAnyRegularContainerStarted returns true if any regular container has // started, which indicates all init containers have been initialized. +// Deprecated: This function is not accurate when its pod sandbox is recreated. +// Use HasAnyActiveRegularContainerStarted instead. func HasAnyRegularContainerStarted(spec *v1.PodSpec, statuses []v1.ContainerStatus) bool { if len(statuses) == 0 { return false @@ -417,3 +419,26 @@ func HasAnyRegularContainerStarted(spec *v1.PodSpec, statuses []v1.ContainerStat return false } + +// HasAnyActiveRegularContainerStarted returns true if any regular container of +// the current pod sandbox has started, which indicates all init containers +// have been initialized. +func HasAnyActiveRegularContainerStarted(spec *v1.PodSpec, podStatus *PodStatus) bool { + if podStatus == nil { + return false + } + + containerNames := sets.New[string]() + for _, c := range spec.Containers { + containerNames.Insert(c.Name) + } + + for _, status := range podStatus.ActiveContainerStatuses { + if !containerNames.Has(status.Name) { + continue + } + return true + } + + return false +} diff --git a/pkg/kubelet/container/helpers_test.go b/pkg/kubelet/container/helpers_test.go index d0af9cc2683..0a12a7118a8 100644 --- a/pkg/kubelet/container/helpers_test.go +++ b/pkg/kubelet/container/helpers_test.go @@ -27,6 +27,7 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" ) func TestEnvVarsToMap(t *testing.T) { @@ -989,3 +990,142 @@ func TestHashContainerWithoutResources(t *testing.T) { }) } } + +func TestHasAnyActiveRegularContainerStarted(t *testing.T) { + testCases := []struct { + desc string + spec *v1.PodSpec + podStatus *PodStatus + expected bool + }{ + { + desc: "pod has no active container", + spec: &v1.PodSpec{ + InitContainers: []v1.Container{ + { + Name: "init", + }, + }, + Containers: []v1.Container{ + { + Name: "regular", + }, + }, + }, + podStatus: &PodStatus{ + SandboxStatuses: []*runtimeapi.PodSandboxStatus{ + { + Id: "old", + State: runtimeapi.PodSandboxState_SANDBOX_NOTREADY, + }, + }, + }, + expected: false, + }, + { + desc: "pod is initializing", + spec: &v1.PodSpec{ + InitContainers: []v1.Container{ + { + Name: "init", + }, + }, + Containers: []v1.Container{ + { + Name: "regular", + }, + }, + }, + podStatus: &PodStatus{ + SandboxStatuses: []*runtimeapi.PodSandboxStatus{ + { + Id: "current", + State: runtimeapi.PodSandboxState_SANDBOX_READY, + }, + }, + ActiveContainerStatuses: []*Status{ + { + Name: "init", + State: ContainerStateRunning, + }, + }, + }, + expected: false, + }, + { + desc: "pod has initialized", + spec: &v1.PodSpec{ + InitContainers: []v1.Container{ + { + Name: "init", + }, + }, + Containers: []v1.Container{ + { + Name: "regular", + }, + }, + }, + podStatus: &PodStatus{ + SandboxStatuses: []*runtimeapi.PodSandboxStatus{ + { + Id: "current", + State: runtimeapi.PodSandboxState_SANDBOX_READY, + }, + }, + ActiveContainerStatuses: []*Status{ + { + Name: "init", + State: ContainerStateExited, + }, + { + Name: "regular", + State: ContainerStateRunning, + }, + }, + }, + expected: true, + }, + { + desc: "pod is re-initializing after the sandbox recreation", + spec: &v1.PodSpec{ + InitContainers: []v1.Container{ + { + Name: "init", + }, + }, + Containers: []v1.Container{ + { + Name: "regular", + }, + }, + }, + podStatus: &PodStatus{ + SandboxStatuses: []*runtimeapi.PodSandboxStatus{ + { + Id: "current", + State: runtimeapi.PodSandboxState_SANDBOX_READY, + }, + { + Id: "old", + State: runtimeapi.PodSandboxState_SANDBOX_NOTREADY, + }, + }, + ActiveContainerStatuses: []*Status{ + { + Name: "init", + State: ContainerStateRunning, + }, + }, + }, + expected: false, + }, + } + + for _, tc := range testCases { + t.Run(tc.desc, func(t *testing.T) { + actual := HasAnyActiveRegularContainerStarted(tc.spec, tc.podStatus) + assert.Equal(t, tc.expected, actual) + }) + } +} diff --git a/pkg/kubelet/container/runtime.go b/pkg/kubelet/container/runtime.go index 87cad70bd8a..025579ab80e 100644 --- a/pkg/kubelet/container/runtime.go +++ b/pkg/kubelet/container/runtime.go @@ -321,6 +321,8 @@ type PodStatus struct { IPs []string // Status of containers in the pod. ContainerStatuses []*Status + // Statuses of containers of the active sandbox in the pod. + ActiveContainerStatuses []*Status // Status of the pod sandbox. // Only for kuberuntime now, other runtime may keep it nil. SandboxStatuses []*runtimeapi.PodSandboxStatus diff --git a/pkg/kubelet/kubelet_pods.go b/pkg/kubelet/kubelet_pods.go index b49afb38668..0e6b1186f75 100644 --- a/pkg/kubelet/kubelet_pods.go +++ b/pkg/kubelet/kubelet_pods.go @@ -1576,7 +1576,7 @@ func (kl *Kubelet) GetKubeletContainerLogs(ctx context.Context, podFullName, con } // getPhase returns the phase of a pod given its container info. -func getPhase(pod *v1.Pod, info []v1.ContainerStatus, podIsTerminal bool) v1.PodPhase { +func getPhase(pod *v1.Pod, info []v1.ContainerStatus, podIsTerminal, podHasInitialized bool) v1.PodPhase { spec := pod.Spec pendingRestartableInitContainers := 0 pendingRegularInitContainers := 0 @@ -1695,7 +1695,7 @@ func getPhase(pod *v1.Pod, info []v1.ContainerStatus, podIsTerminal bool) v1.Pod // This is needed to handle the case where the pod has been initialized but // the restartable init containers are restarting and the pod should not be // placed back into v1.PodPending since the regular containers have run. - !kubecontainer.HasAnyRegularContainerStarted(&spec, info)): + !podHasInitialized): fallthrough case waiting > 0: klog.V(5).InfoS("Pod waiting > 0, pending") @@ -1768,7 +1768,7 @@ func (kl *Kubelet) generateAPIPodStatus(pod *v1.Pod, podStatus *kubecontainer.Po s := kl.convertStatusToAPIStatus(pod, podStatus, oldPodStatus) // calculate the next phase and preserve reason allStatus := append(append([]v1.ContainerStatus{}, s.ContainerStatuses...), s.InitContainerStatuses...) - s.Phase = getPhase(pod, allStatus, podIsTerminal) + s.Phase = getPhase(pod, allStatus, podIsTerminal, kubecontainer.HasAnyActiveRegularContainerStarted(&pod.Spec, podStatus)) klog.V(4).InfoS("Got phase for pod", "pod", klog.KObj(pod), "oldPhase", oldPodStatus.Phase, "phase", s.Phase) // Perform a three-way merge between the statuses from the status manager, diff --git a/pkg/kubelet/kubelet_pods_test.go b/pkg/kubelet/kubelet_pods_test.go index 04ee389c6c5..f04425c2b9d 100644 --- a/pkg/kubelet/kubelet_pods_test.go +++ b/pkg/kubelet/kubelet_pods_test.go @@ -2306,7 +2306,7 @@ func TestPodPhaseWithRestartAlways(t *testing.T) { }, } for _, test := range tests { - status := getPhase(test.pod, test.pod.Status.ContainerStatuses, test.podIsTerminal) + status := getPhase(test.pod, test.pod.Status.ContainerStatuses, test.podIsTerminal, false) assert.Equal(t, test.status, status, "[test %s]", test.test) } } @@ -2410,7 +2410,7 @@ func TestPodPhaseWithRestartAlwaysInitContainers(t *testing.T) { for _, test := range tests { statusInfo := test.pod.Status.InitContainerStatuses statusInfo = append(statusInfo, test.pod.Status.ContainerStatuses...) - status := getPhase(test.pod, statusInfo, false) + status := getPhase(test.pod, statusInfo, false, false) assert.Equal(t, test.status, status, "[test %s]", test.test) } } @@ -2429,12 +2429,13 @@ func TestPodPhaseWithRestartAlwaysRestartableInitContainers(t *testing.T) { } tests := []struct { - pod *v1.Pod - podIsTerminal bool - status v1.PodPhase - test string + pod *v1.Pod + podIsTerminal bool + podHasInitialized bool + status v1.PodPhase + test string }{ - {&v1.Pod{Spec: desiredState, Status: v1.PodStatus{}}, false, v1.PodPending, "empty, waiting"}, + {&v1.Pod{Spec: desiredState, Status: v1.PodStatus{}}, false, false, v1.PodPending, "empty, waiting"}, { &v1.Pod{ Spec: desiredState, @@ -2445,6 +2446,7 @@ func TestPodPhaseWithRestartAlwaysRestartableInitContainers(t *testing.T) { }, }, false, + false, v1.PodPending, "restartable init container running", }, @@ -2458,6 +2460,7 @@ func TestPodPhaseWithRestartAlwaysRestartableInitContainers(t *testing.T) { }, }, false, + false, v1.PodPending, "restartable init container stopped", }, @@ -2471,6 +2474,7 @@ func TestPodPhaseWithRestartAlwaysRestartableInitContainers(t *testing.T) { }, }, false, + false, v1.PodPending, "restartable init container waiting, terminated zero", }, @@ -2484,6 +2488,7 @@ func TestPodPhaseWithRestartAlwaysRestartableInitContainers(t *testing.T) { }, }, false, + false, v1.PodPending, "restartable init container waiting, terminated non-zero", }, @@ -2497,6 +2502,7 @@ func TestPodPhaseWithRestartAlwaysRestartableInitContainers(t *testing.T) { }, }, false, + false, v1.PodPending, "restartable init container waiting, not terminated", }, @@ -2513,6 +2519,7 @@ func TestPodPhaseWithRestartAlwaysRestartableInitContainers(t *testing.T) { }, }, false, + true, v1.PodPending, "restartable init container started, 1/2 regular container running", }, @@ -2530,6 +2537,7 @@ func TestPodPhaseWithRestartAlwaysRestartableInitContainers(t *testing.T) { }, }, false, + true, v1.PodRunning, "restartable init container started, all regular containers running", }, @@ -2547,6 +2555,7 @@ func TestPodPhaseWithRestartAlwaysRestartableInitContainers(t *testing.T) { }, }, false, + true, v1.PodRunning, "restartable init container running, all regular containers running", }, @@ -2564,6 +2573,7 @@ func TestPodPhaseWithRestartAlwaysRestartableInitContainers(t *testing.T) { }, }, false, + true, v1.PodRunning, "restartable init container stopped, all regular containers running", }, @@ -2581,6 +2591,7 @@ func TestPodPhaseWithRestartAlwaysRestartableInitContainers(t *testing.T) { }, }, false, + true, v1.PodRunning, "backoff crashloop restartable init container, all regular containers running", }, @@ -2598,6 +2609,7 @@ func TestPodPhaseWithRestartAlwaysRestartableInitContainers(t *testing.T) { }, }, true, + true, v1.PodSucceeded, "all regular containers succeeded and restartable init container failed with restart always, but the pod is terminal", }, @@ -2615,14 +2627,33 @@ func TestPodPhaseWithRestartAlwaysRestartableInitContainers(t *testing.T) { }, }, true, + true, v1.PodSucceeded, "all regular containers succeeded and restartable init container succeeded with restart always, but the pod is terminal", }, + { + &v1.Pod{ + Spec: desiredState, + Status: v1.PodStatus{ + InitContainerStatuses: []v1.ContainerStatus{ + runningState("containerX"), + }, + ContainerStatuses: []v1.ContainerStatus{ + runningState("containerA"), + runningState("containerB"), + }, + }, + }, + false, + false, + v1.PodPending, + "re-initializing the pod after the sandbox is recreated", + }, } for _, test := range tests { statusInfo := test.pod.Status.InitContainerStatuses statusInfo = append(statusInfo, test.pod.Status.ContainerStatuses...) - status := getPhase(test.pod, statusInfo, test.podIsTerminal) + status := getPhase(test.pod, statusInfo, test.podIsTerminal, test.podHasInitialized) assert.Equal(t, test.status, status, "[test %s]", test.test) } } @@ -2641,9 +2672,10 @@ func TestPodPhaseWithRestartAlwaysAndPodHasRun(t *testing.T) { } tests := []struct { - pod *v1.Pod - status v1.PodPhase - test string + pod *v1.Pod + podHasInitialized bool + status v1.PodPhase + test string }{ { &v1.Pod{ @@ -2658,6 +2690,7 @@ func TestPodPhaseWithRestartAlwaysAndPodHasRun(t *testing.T) { }, }, }, + false, v1.PodPending, "regular init containers, restartable init container and regular container are all running", }, @@ -2674,6 +2707,7 @@ func TestPodPhaseWithRestartAlwaysAndPodHasRun(t *testing.T) { }, }, }, + false, v1.PodPending, "regular containers is stopped, restartable init container and regular int container are both running", }, @@ -2690,6 +2724,24 @@ func TestPodPhaseWithRestartAlwaysAndPodHasRun(t *testing.T) { }, }, }, + false, + v1.PodPending, + "re-created sandbox: regular init container is succeeded, restartable init container is running, old regular containers is stopped", + }, + { + &v1.Pod{ + Spec: desiredState, + Status: v1.PodStatus{ + InitContainerStatuses: []v1.ContainerStatus{ + succeededState("containerX"), + runningState("containerY"), + }, + ContainerStatuses: []v1.ContainerStatus{ + stoppedState("containerA"), + }, + }, + }, + true, v1.PodRunning, "regular init container is succeeded, restartable init container is running, regular containers is stopped", }, @@ -2706,6 +2758,7 @@ func TestPodPhaseWithRestartAlwaysAndPodHasRun(t *testing.T) { }, }, }, + true, v1.PodRunning, "regular init container is succeeded, restartable init container and regular containers are both running", }, @@ -2713,7 +2766,7 @@ func TestPodPhaseWithRestartAlwaysAndPodHasRun(t *testing.T) { for _, test := range tests { statusInfo := test.pod.Status.InitContainerStatuses statusInfo = append(statusInfo, test.pod.Status.ContainerStatuses...) - status := getPhase(test.pod, statusInfo, false) + status := getPhase(test.pod, statusInfo, false, test.podHasInitialized) assert.Equal(t, test.status, status, "[test %s]", test.test) } } @@ -2813,7 +2866,7 @@ func TestPodPhaseWithRestartNever(t *testing.T) { }, } for _, test := range tests { - status := getPhase(test.pod, test.pod.Status.ContainerStatuses, false) + status := getPhase(test.pod, test.pod.Status.ContainerStatuses, false, false) assert.Equal(t, test.status, status, "[test %s]", test.test) } } @@ -2917,7 +2970,7 @@ func TestPodPhaseWithRestartNeverInitContainers(t *testing.T) { for _, test := range tests { statusInfo := test.pod.Status.InitContainerStatuses statusInfo = append(statusInfo, test.pod.Status.ContainerStatuses...) - status := getPhase(test.pod, statusInfo, false) + status := getPhase(test.pod, statusInfo, false, false) assert.Equal(t, test.status, status, "[test %s]", test.test) } } @@ -2936,11 +2989,12 @@ func TestPodPhaseWithRestartNeverRestartableInitContainers(t *testing.T) { } tests := []struct { - pod *v1.Pod - status v1.PodPhase - test string + pod *v1.Pod + podHasInitialized bool + status v1.PodPhase + test string }{ - {&v1.Pod{Spec: desiredState, Status: v1.PodStatus{}}, v1.PodPending, "empty, waiting"}, + {&v1.Pod{Spec: desiredState, Status: v1.PodStatus{}}, false, v1.PodPending, "empty, waiting"}, { &v1.Pod{ Spec: desiredState, @@ -2950,6 +3004,7 @@ func TestPodPhaseWithRestartNeverRestartableInitContainers(t *testing.T) { }, }, }, + false, v1.PodPending, "restartable init container running", }, @@ -2962,6 +3017,7 @@ func TestPodPhaseWithRestartNeverRestartableInitContainers(t *testing.T) { }, }, }, + false, v1.PodPending, "restartable init container stopped", }, @@ -2974,6 +3030,7 @@ func TestPodPhaseWithRestartNeverRestartableInitContainers(t *testing.T) { }, }, }, + false, v1.PodPending, "restartable init container waiting, terminated zero", }, @@ -2986,6 +3043,7 @@ func TestPodPhaseWithRestartNeverRestartableInitContainers(t *testing.T) { }, }, }, + false, v1.PodPending, "restartable init container waiting, terminated non-zero", }, @@ -2998,6 +3056,7 @@ func TestPodPhaseWithRestartNeverRestartableInitContainers(t *testing.T) { }, }, }, + false, v1.PodPending, "restartable init container waiting, not terminated", }, @@ -3013,6 +3072,7 @@ func TestPodPhaseWithRestartNeverRestartableInitContainers(t *testing.T) { }, }, }, + true, v1.PodPending, "restartable init container started, one main container running", }, @@ -3029,6 +3089,7 @@ func TestPodPhaseWithRestartNeverRestartableInitContainers(t *testing.T) { }, }, }, + true, v1.PodRunning, "restartable init container started, main containers succeeded", }, @@ -3045,8 +3106,9 @@ func TestPodPhaseWithRestartNeverRestartableInitContainers(t *testing.T) { }, }, }, + true, v1.PodRunning, - "restartable init container running, main containers succeeded", + "restartable init container re-running, main containers succeeded", }, { &v1.Pod{ @@ -3061,6 +3123,7 @@ func TestPodPhaseWithRestartNeverRestartableInitContainers(t *testing.T) { }, }, }, + true, v1.PodSucceeded, "all containers succeeded", }, @@ -3077,6 +3140,7 @@ func TestPodPhaseWithRestartNeverRestartableInitContainers(t *testing.T) { }, }, }, + true, v1.PodSucceeded, "restartable init container terminated non-zero, main containers succeeded", }, @@ -3093,6 +3157,7 @@ func TestPodPhaseWithRestartNeverRestartableInitContainers(t *testing.T) { }, }, }, + true, v1.PodSucceeded, "backoff crashloop restartable init container, main containers succeeded", }, @@ -3109,6 +3174,7 @@ func TestPodPhaseWithRestartNeverRestartableInitContainers(t *testing.T) { }, }, }, + true, v1.PodSucceeded, "backoff crashloop with non-zero restartable init container, main containers succeeded", }, @@ -3116,7 +3182,7 @@ func TestPodPhaseWithRestartNeverRestartableInitContainers(t *testing.T) { for _, test := range tests { statusInfo := test.pod.Status.InitContainerStatuses statusInfo = append(statusInfo, test.pod.Status.ContainerStatuses...) - status := getPhase(test.pod, statusInfo, false) + status := getPhase(test.pod, statusInfo, false, test.podHasInitialized) assert.Equal(t, test.status, status, "[test %s]", test.test) } } @@ -3229,7 +3295,7 @@ func TestPodPhaseWithRestartOnFailure(t *testing.T) { }, } for _, test := range tests { - status := getPhase(test.pod, test.pod.Status.ContainerStatuses, false) + status := getPhase(test.pod, test.pod.Status.ContainerStatuses, false, false) assert.Equal(t, test.status, status, "[test %s]", test.test) } } diff --git a/pkg/kubelet/kuberuntime/kuberuntime_container.go b/pkg/kubelet/kuberuntime/kuberuntime_container.go index b6406f776db..beefda9d369 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_container.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_container.go @@ -605,17 +605,18 @@ func (m *kubeGenericRuntimeManager) convertToKubeContainerStatus(status *runtime } // getPodContainerStatuses gets all containers' statuses for the pod. -func (m *kubeGenericRuntimeManager) getPodContainerStatuses(ctx context.Context, uid kubetypes.UID, name, namespace string) ([]*kubecontainer.Status, error) { +func (m *kubeGenericRuntimeManager) getPodContainerStatuses(ctx context.Context, uid kubetypes.UID, name, namespace, activePodSandboxID string) ([]*kubecontainer.Status, []*kubecontainer.Status, error) { // Select all containers of the given pod. containers, err := m.runtimeService.ListContainers(ctx, &runtimeapi.ContainerFilter{ LabelSelector: map[string]string{kubelettypes.KubernetesPodUIDLabel: string(uid)}, }) if err != nil { klog.ErrorS(err, "ListContainers error") - return nil, err + return nil, nil, err } statuses := []*kubecontainer.Status{} + activeContainerStatuses := []*kubecontainer.Status{} // TODO: optimization: set maximum number of containers per container name to examine. for _, c := range containers { resp, err := m.runtimeService.ContainerStatus(ctx, c.Id, false) @@ -629,18 +630,22 @@ func (m *kubeGenericRuntimeManager) getPodContainerStatuses(ctx context.Context, if err != nil { // Merely log this here; GetPodStatus will actually report the error out. klog.V(4).InfoS("ContainerStatus return error", "containerID", c.Id, "err", err) - return nil, err + return nil, nil, err } status := resp.GetStatus() if status == nil { - return nil, remote.ErrContainerStatusNil + return nil, nil, remote.ErrContainerStatusNil } cStatus := m.convertToKubeContainerStatus(status) statuses = append(statuses, cStatus) + if c.PodSandboxId == activePodSandboxID { + activeContainerStatuses = append(activeContainerStatuses, cStatus) + } } sort.Sort(containerStatusByCreated(statuses)) - return statuses, nil + sort.Sort(containerStatusByCreated(activeContainerStatuses)) + return statuses, activeContainerStatuses, nil } func toKubeContainerStatus(status *runtimeapi.ContainerStatus, runtimeName string) *kubecontainer.Status { diff --git a/pkg/kubelet/kuberuntime/kuberuntime_container_linux_test.go b/pkg/kubelet/kuberuntime/kuberuntime_container_linux_test.go index 4d5e75c793a..5c0549e20b0 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_container_linux_test.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_container_linux_test.go @@ -1691,7 +1691,7 @@ func TestUpdatePodSandboxResources(t *testing.T) { assert.Len(t, fakeContainers, 1) ctx := context.Background() - _, err := m.getPodContainerStatuses(ctx, pod.UID, pod.Name, pod.Namespace) + _, _, err := m.getPodContainerStatuses(ctx, pod.UID, pod.Name, pod.Namespace, "") require.NoError(t, err) resourceConfig := &cm.ResourceConfig{} diff --git a/pkg/kubelet/kuberuntime/kuberuntime_container_test.go b/pkg/kubelet/kuberuntime/kuberuntime_container_test.go index ee1a1bbe48b..e22205f4eda 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_container_test.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_container_test.go @@ -959,7 +959,7 @@ func TestUpdateContainerResources(t *testing.T) { assert.Len(t, fakeContainers, 1) ctx := context.Background() - cStatus, err := m.getPodContainerStatuses(ctx, pod.UID, pod.Name, pod.Namespace) + cStatus, _, err := m.getPodContainerStatuses(ctx, pod.UID, pod.Name, pod.Namespace, "") assert.NoError(t, err) containerID := cStatus[0].ID diff --git a/pkg/kubelet/kuberuntime/kuberuntime_manager.go b/pkg/kubelet/kuberuntime/kuberuntime_manager.go index 68d6129a7cf..a3afd065ea0 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_manager.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_manager.go @@ -1642,9 +1642,11 @@ func (m *kubeGenericRuntimeManager) GetPodStatus(ctx context.Context, uid kubety sandboxStatuses := []*runtimeapi.PodSandboxStatus{} containerStatuses := []*kubecontainer.Status{} + activeContainerStatuses := []*kubecontainer.Status{} timestamp := time.Now() podIPs := []string{} + var activePodSandboxID string for idx, podSandboxID := range podSandboxIDs { resp, err := m.runtimeService.PodSandboxStatus(ctx, podSandboxID, false) // Between List (getSandboxIDByPodUID) and check (PodSandboxStatus) another thread might remove a container, and that is normal. @@ -1666,6 +1668,7 @@ func (m *kubeGenericRuntimeManager) GetPodStatus(ctx context.Context, uid kubety // Only get pod IP from latest sandbox if idx == 0 && resp.Status.State == runtimeapi.PodSandboxState_SANDBOX_READY { podIPs = m.determinePodSandboxIPs(namespace, name, resp.Status) + activePodSandboxID = podSandboxID } if idx == 0 && utilfeature.DefaultFeatureGate.Enabled(features.EventedPLEG) { @@ -1676,7 +1679,7 @@ func (m *kubeGenericRuntimeManager) GetPodStatus(ctx context.Context, uid kubety // features gate enabled, which includes Evented PLEG, but uses the // runtime without Evented PLEG support. klog.V(4).InfoS("Runtime does not set pod status timestamp", "pod", klog.KObj(pod)) - containerStatuses, err = m.getPodContainerStatuses(ctx, uid, name, namespace) + containerStatuses, activeContainerStatuses, err = m.getPodContainerStatuses(ctx, uid, name, namespace, activePodSandboxID) if err != nil { if m.logReduction.ShouldMessageBePrinted(err.Error(), podFullName) { klog.ErrorS(err, "getPodContainerStatuses for pod failed", "pod", klog.KObj(pod)) @@ -1697,7 +1700,7 @@ func (m *kubeGenericRuntimeManager) GetPodStatus(ctx context.Context, uid kubety if !utilfeature.DefaultFeatureGate.Enabled(features.EventedPLEG) { // Get statuses of all containers visible in the pod. - containerStatuses, err = m.getPodContainerStatuses(ctx, uid, name, namespace) + containerStatuses, activeContainerStatuses, err = m.getPodContainerStatuses(ctx, uid, name, namespace, activePodSandboxID) if err != nil { if m.logReduction.ShouldMessageBePrinted(err.Error(), podFullName) { klog.ErrorS(err, "getPodContainerStatuses for pod failed", "pod", klog.KObj(pod)) @@ -1708,13 +1711,14 @@ func (m *kubeGenericRuntimeManager) GetPodStatus(ctx context.Context, uid kubety m.logReduction.ClearID(podFullName) return &kubecontainer.PodStatus{ - ID: uid, - Name: name, - Namespace: namespace, - IPs: podIPs, - SandboxStatuses: sandboxStatuses, - ContainerStatuses: containerStatuses, - TimeStamp: timestamp, + ID: uid, + Name: name, + Namespace: namespace, + IPs: podIPs, + SandboxStatuses: sandboxStatuses, + ContainerStatuses: containerStatuses, + ActiveContainerStatuses: activeContainerStatuses, + TimeStamp: timestamp, }, nil } diff --git a/test/e2e_node/container_lifecycle_test.go b/test/e2e_node/container_lifecycle_test.go index a3a7a205611..a10d70cdbeb 100644 --- a/test/e2e_node/container_lifecycle_test.go +++ b/test/e2e_node/container_lifecycle_test.go @@ -5417,7 +5417,7 @@ var _ = SIGDescribe(feature.SidecarContainers, framework.WithSerial(), "Containe f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged ginkgo.When("A node running restartable init containers reboots", func() { - ginkgo.It("should restart the containers in right order after the node reboot", func(ctx context.Context) { + ginkgo.It("should restart the containers in right order with the proper phase after the node reboot", func(ctx context.Context) { init1 := "init-1" restartableInit2 := "restartable-init-2" init3 := "init-3" @@ -5505,18 +5505,22 @@ var _ = SIGDescribe(feature.SidecarContainers, framework.WithSerial(), "Containe return kubeletHealthCheck(kubeletHealthCheckURL) }, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("kubelet was expected to be healthy")) - ginkgo.By("Waiting for the pod to be re-initialized and run") + ginkgo.By("Waiting for the pod to re-initialize") err = e2epod.WaitForPodCondition(ctx, f.ClientSet, pod.Namespace, pod.Name, "re-initialized", f.Timeouts.PodStart, func(pod *v1.Pod) (bool, error) { - if pod.Status.ContainerStatuses[0].RestartCount < 1 { + if pod.Status.InitContainerStatuses[0].RestartCount < 1 { return false, nil } - if pod.Status.Phase != v1.PodRunning { - return false, nil + if pod.Status.Phase != v1.PodPending { + return false, fmt.Errorf("pod should remain in the pending phase until it is re-initialized, but it is %q", pod.Status.Phase) } return true, nil }) framework.ExpectNoError(err) + ginkgo.By("Waiting for the pod to run after re-initialization") + err = e2epod.WaitForPodRunningInNamespace(ctx, f.ClientSet, pod) + framework.ExpectNoError(err) + ginkgo.By("Parsing results") pod, err = client.Get(ctx, pod.Name, metav1.GetOptions{}) framework.ExpectNoError(err)