mirror of
https://github.com/k3s-io/kubernetes.git
synced 2026-02-21 22:57:15 +00:00
Fix startup probe worker termination for sidecar containers
Fixes a bug where startup probe workers terminate incorrectly for sidecar containers with restartPolicy=Always when the pod has restartPolicy=Never, causing main containers to remain stuck in Initializing state. Changes: - Add container-level restart policy check for init containers only - Extract complex boolean logic to named variable for readability - Refactor test helper to use existing newWorker() function - Add comprehensive unit and e2e tests for both scenarios
This commit is contained in:
@@ -81,6 +81,16 @@ type worker struct {
|
||||
proberDurationUnknownMetricLabels metrics.Labels
|
||||
}
|
||||
|
||||
// isInitContainer checks if the worker's container is in the pod's init containers
|
||||
func (w *worker) isInitContainer() bool {
|
||||
for _, initContainer := range w.pod.Spec.InitContainers {
|
||||
if initContainer.Name == w.container.Name {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Creates and starts a new probe worker.
|
||||
func newWorker(
|
||||
m *manager,
|
||||
@@ -251,9 +261,14 @@ func (w *worker) doProbe(ctx context.Context) (keepGoing bool) {
|
||||
if !w.containerID.IsEmpty() {
|
||||
w.resultsManager.Set(w.containerID, results.Failure, w.pod)
|
||||
}
|
||||
|
||||
isRestartableInitContainer := w.isInitContainer() &&
|
||||
w.container.RestartPolicy != nil && *w.container.RestartPolicy == v1.ContainerRestartPolicyAlways
|
||||
|
||||
// Abort if the container will not be restarted.
|
||||
return c.State.Terminated == nil ||
|
||||
w.pod.Spec.RestartPolicy != v1.RestartPolicyNever
|
||||
w.pod.Spec.RestartPolicy != v1.RestartPolicyNever ||
|
||||
isRestartableInitContainer
|
||||
}
|
||||
|
||||
// Graceful shutdown of the pod.
|
||||
|
||||
@@ -37,6 +37,25 @@ import (
|
||||
func init() {
|
||||
}
|
||||
|
||||
// newTestWorkerWithRestartableInitContainer creates a test worker with an init container setup
|
||||
func newTestWorkerWithRestartableInitContainer(m *manager, probeType probeType) *worker {
|
||||
pod := getTestPod()
|
||||
|
||||
// Set up init container with restart policy
|
||||
initContainer := pod.Spec.Containers[0]
|
||||
initContainer.Name = testContainerName
|
||||
restartPolicy := v1.ContainerRestartPolicyAlways
|
||||
initContainer.RestartPolicy = &restartPolicy
|
||||
|
||||
// Move container to init containers and add a regular container
|
||||
pod.Spec.InitContainers = []v1.Container{initContainer}
|
||||
pod.Spec.Containers = []v1.Container{{
|
||||
Name: "main-container",
|
||||
}}
|
||||
|
||||
return newWorker(m, probeType, pod, initContainer)
|
||||
}
|
||||
|
||||
func TestDoProbe(t *testing.T) {
|
||||
m := newTestManager()
|
||||
|
||||
@@ -529,6 +548,72 @@ func TestResultRunOnStartupCheckFailure(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestDoProbe_TerminatedRestartableInitContainerWithRestartPolicyNever(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
m := newTestManager()
|
||||
|
||||
// Test restartable init container (sidecar) behavior
|
||||
w := newTestWorkerWithRestartableInitContainer(m, startup)
|
||||
|
||||
// Set pod restart policy to Never
|
||||
w.pod.Spec.RestartPolicy = v1.RestartPolicyNever
|
||||
|
||||
// Create terminated status for init container
|
||||
terminatedStatus := getTestRunningStatus()
|
||||
terminatedStatus.InitContainerStatuses = []v1.ContainerStatus{{
|
||||
Name: testContainerName,
|
||||
ContainerID: "test://test_container_id",
|
||||
State: v1.ContainerState{
|
||||
Terminated: &v1.ContainerStateTerminated{
|
||||
StartedAt: metav1.Now(),
|
||||
},
|
||||
},
|
||||
}}
|
||||
terminatedStatus.ContainerStatuses[0].State.Running = nil
|
||||
|
||||
m.statusManager.SetPodStatus(w.pod, terminatedStatus)
|
||||
|
||||
// Test: Terminated restartable init container with restartPolicy=Always should continue probing
|
||||
// even when pod has restartPolicy=Never
|
||||
if !w.doProbe(ctx) {
|
||||
t.Error("Expected to continue probing for terminated restartable init container with pod restart policy Never")
|
||||
}
|
||||
|
||||
// Verify result is set to Failure for terminated container
|
||||
expectResult(t, w, results.Failure, "restartable init container with pod restart policy Never")
|
||||
}
|
||||
|
||||
func TestDoProbe_TerminatedContainerWithRestartPolicyNever(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
m := newTestManager()
|
||||
|
||||
// Test that regular containers (without RestartPolicy) still work as before
|
||||
w := newTestWorker(m, startup, v1.Probe{})
|
||||
|
||||
// Regular container without explicit restart policy
|
||||
w.container.RestartPolicy = nil
|
||||
|
||||
// Set pod restart policy to Never
|
||||
w.pod.Spec.RestartPolicy = v1.RestartPolicyNever
|
||||
|
||||
// Create terminated status
|
||||
terminatedStatus := getTestRunningStatus()
|
||||
terminatedStatus.ContainerStatuses[0].State.Running = nil
|
||||
terminatedStatus.ContainerStatuses[0].State.Terminated = &v1.ContainerStateTerminated{
|
||||
StartedAt: metav1.Now(),
|
||||
}
|
||||
|
||||
m.statusManager.SetPodStatus(w.pod, terminatedStatus)
|
||||
|
||||
// Should stop probing (existing behavior preserved)
|
||||
if w.doProbe(ctx) {
|
||||
t.Error("Expected to stop probing for regular container with pod RestartPolicy=Never")
|
||||
}
|
||||
|
||||
// Verify result is set to Failure for terminated container
|
||||
expectResult(t, w, results.Failure, "regular container with pod restart policy Never")
|
||||
}
|
||||
|
||||
func TestLivenessProbeDisabledByStarted(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
m := newTestManager()
|
||||
|
||||
@@ -5408,7 +5408,113 @@ var _ = SIGDescribe(feature.SidecarContainers, "Containers Lifecycle", func() {
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
})
|
||||
|
||||
ginkgo.When("A restartable init container with startup probe fails initially", func() {
|
||||
ginkgo.It("should continue probing and allow regular container to start after the restartable init container recovers", func(ctx context.Context) {
|
||||
restartableInit := "buggy-restartable-init"
|
||||
regularContainer := "regular-container"
|
||||
|
||||
restartPolicyAlways := v1.ContainerRestartPolicyAlways
|
||||
|
||||
pod := &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "restartable-init-startup-probe-fix",
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
InitContainers: []v1.Container{{
|
||||
Name: restartableInit,
|
||||
Image: busyboxImage,
|
||||
RestartPolicy: &restartPolicyAlways,
|
||||
Command: []string{"sh", "-c", `
|
||||
if [ ! -f /persistent/first_run_done ]; then
|
||||
echo 'First run: creating marker and exiting with 1'
|
||||
touch /persistent/first_run_done
|
||||
exit 1
|
||||
else
|
||||
echo 'Second run: marker found, running as sidecar'
|
||||
sleep 120
|
||||
fi`},
|
||||
StartupProbe: &v1.Probe{
|
||||
InitialDelaySeconds: 3,
|
||||
PeriodSeconds: 2,
|
||||
FailureThreshold: 10,
|
||||
ProbeHandler: v1.ProbeHandler{
|
||||
Exec: &v1.ExecAction{
|
||||
Command: []string{"/bin/true"},
|
||||
},
|
||||
},
|
||||
},
|
||||
}},
|
||||
Containers: []v1.Container{{
|
||||
Name: regularContainer,
|
||||
Image: imageutils.GetPauseImageName(),
|
||||
StartupProbe: &v1.Probe{
|
||||
InitialDelaySeconds: 5,
|
||||
PeriodSeconds: 5,
|
||||
ProbeHandler: v1.ProbeHandler{
|
||||
Exec: &v1.ExecAction{
|
||||
Command: []string{"/bin/true"},
|
||||
},
|
||||
},
|
||||
},
|
||||
}},
|
||||
},
|
||||
}
|
||||
|
||||
preparePod(pod)
|
||||
client := e2epod.NewPodClient(f)
|
||||
pod = client.Create(ctx, pod)
|
||||
|
||||
ginkgo.By("Waiting for init container to fail and restart at least once")
|
||||
framework.ExpectNoError(
|
||||
e2epod.WaitForPodCondition(ctx, f.ClientSet, pod.Namespace, pod.Name,
|
||||
"restartable init restarted", 90*time.Second,
|
||||
func(p *v1.Pod) (bool, error) {
|
||||
for _, st := range p.Status.InitContainerStatuses {
|
||||
if st.Name == restartableInit && st.RestartCount > 0 {
|
||||
framework.Logf("Init container %s has restarted %d times", restartableInit, st.RestartCount)
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
return false, nil
|
||||
}),
|
||||
)
|
||||
|
||||
ginkgo.By("Waiting for init container to be running after restart")
|
||||
framework.ExpectNoError(
|
||||
e2epod.WaitForPodCondition(ctx, f.ClientSet, pod.Namespace, pod.Name,
|
||||
"restartable init running", 90*time.Second,
|
||||
func(p *v1.Pod) (bool, error) {
|
||||
for _, st := range p.Status.InitContainerStatuses {
|
||||
if st.Name == restartableInit && st.State.Running != nil {
|
||||
framework.Logf("Init container %s is now running", restartableInit)
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
return false, nil
|
||||
}),
|
||||
)
|
||||
|
||||
ginkgo.By("Waiting for regular container to start")
|
||||
framework.ExpectNoError(
|
||||
e2epod.WaitForPodCondition(ctx, f.ClientSet, pod.Namespace, pod.Name,
|
||||
"regular container running", 120*time.Second,
|
||||
func(p *v1.Pod) (bool, error) {
|
||||
for _, st := range p.Status.ContainerStatuses {
|
||||
if st.Name == regularContainer && st.State.Running != nil {
|
||||
framework.Logf("Regular container %s is running", regularContainer)
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
return false, nil
|
||||
}),
|
||||
)
|
||||
})
|
||||
})
|
||||
|
||||
})
|
||||
|
||||
var _ = SIGDescribe(feature.SidecarContainers, framework.WithSerial(), "Containers Lifecycle", func() {
|
||||
|
||||
Reference in New Issue
Block a user