diff --git a/pkg/apis/core/v1/defaults_test.go b/pkg/apis/core/v1/defaults_test.go index 00e468f2d6b..4e0a9d93586 100644 --- a/pkg/apis/core/v1/defaults_test.go +++ b/pkg/apis/core/v1/defaults_test.go @@ -71,6 +71,12 @@ func TestWorkloadDefaults(t *testing.T) { ".Spec.Containers[0].ReadinessProbe.PeriodSeconds": `10`, ".Spec.Containers[0].ReadinessProbe.SuccessThreshold": `1`, ".Spec.Containers[0].ReadinessProbe.TimeoutSeconds": `1`, + ".Spec.Containers[0].StartupProbe.FailureThreshold": "3", + ".Spec.Containers[0].StartupProbe.Handler.HTTPGet.Path": `"/"`, + ".Spec.Containers[0].StartupProbe.Handler.HTTPGet.Scheme": `"HTTP"`, + ".Spec.Containers[0].StartupProbe.PeriodSeconds": "10", + ".Spec.Containers[0].StartupProbe.SuccessThreshold": "1", + ".Spec.Containers[0].StartupProbe.TimeoutSeconds": "1", ".Spec.Containers[0].TerminationMessagePath": `"/dev/termination-log"`, ".Spec.Containers[0].TerminationMessagePolicy": `"File"`, ".Spec.DNSPolicy": `"ClusterFirst"`, @@ -92,6 +98,12 @@ func TestWorkloadDefaults(t *testing.T) { ".Spec.EphemeralContainers[0].EphemeralContainerCommon.ReadinessProbe.PeriodSeconds": "10", ".Spec.EphemeralContainers[0].EphemeralContainerCommon.ReadinessProbe.SuccessThreshold": "1", ".Spec.EphemeralContainers[0].EphemeralContainerCommon.ReadinessProbe.TimeoutSeconds": "1", + ".Spec.EphemeralContainers[0].EphemeralContainerCommon.StartupProbe.FailureThreshold": "3", + ".Spec.EphemeralContainers[0].EphemeralContainerCommon.StartupProbe.Handler.HTTPGet.Path": `"/"`, + ".Spec.EphemeralContainers[0].EphemeralContainerCommon.StartupProbe.Handler.HTTPGet.Scheme": `"HTTP"`, + ".Spec.EphemeralContainers[0].EphemeralContainerCommon.StartupProbe.PeriodSeconds": "10", + ".Spec.EphemeralContainers[0].EphemeralContainerCommon.StartupProbe.SuccessThreshold": "1", + ".Spec.EphemeralContainers[0].EphemeralContainerCommon.StartupProbe.TimeoutSeconds": "1", ".Spec.InitContainers[0].Env[0].ValueFrom.FieldRef.APIVersion": `"v1"`, ".Spec.InitContainers[0].ImagePullPolicy": `"IfNotPresent"`, ".Spec.InitContainers[0].Lifecycle.PostStart.HTTPGet.Path": `"/"`, @@ -111,6 +123,12 @@ func TestWorkloadDefaults(t *testing.T) { ".Spec.InitContainers[0].ReadinessProbe.PeriodSeconds": `10`, ".Spec.InitContainers[0].ReadinessProbe.SuccessThreshold": `1`, ".Spec.InitContainers[0].ReadinessProbe.TimeoutSeconds": `1`, + ".Spec.InitContainers[0].StartupProbe.FailureThreshold": "3", + ".Spec.InitContainers[0].StartupProbe.Handler.HTTPGet.Path": `"/"`, + ".Spec.InitContainers[0].StartupProbe.Handler.HTTPGet.Scheme": `"HTTP"`, + ".Spec.InitContainers[0].StartupProbe.PeriodSeconds": "10", + ".Spec.InitContainers[0].StartupProbe.SuccessThreshold": "1", + ".Spec.InitContainers[0].StartupProbe.TimeoutSeconds": "1", ".Spec.InitContainers[0].TerminationMessagePath": `"/dev/termination-log"`, ".Spec.InitContainers[0].TerminationMessagePolicy": `"File"`, ".Spec.RestartPolicy": `"Always"`, @@ -174,6 +192,12 @@ func TestPodDefaults(t *testing.T) { ".Spec.Containers[0].ReadinessProbe.SuccessThreshold": `1`, ".Spec.Containers[0].ReadinessProbe.TimeoutSeconds": `1`, ".Spec.Containers[0].Resources.Requests": `{"":"0"}`, // this gets defaulted from the limits field + ".Spec.Containers[0].StartupProbe.FailureThreshold": "3", + ".Spec.Containers[0].StartupProbe.Handler.HTTPGet.Path": `"/"`, + ".Spec.Containers[0].StartupProbe.Handler.HTTPGet.Scheme": `"HTTP"`, + ".Spec.Containers[0].StartupProbe.PeriodSeconds": "10", + ".Spec.Containers[0].StartupProbe.SuccessThreshold": "1", + ".Spec.Containers[0].StartupProbe.TimeoutSeconds": "1", ".Spec.Containers[0].TerminationMessagePath": `"/dev/termination-log"`, ".Spec.Containers[0].TerminationMessagePolicy": `"File"`, ".Spec.DNSPolicy": `"ClusterFirst"`, @@ -196,6 +220,12 @@ func TestPodDefaults(t *testing.T) { ".Spec.EphemeralContainers[0].EphemeralContainerCommon.ReadinessProbe.PeriodSeconds": "10", ".Spec.EphemeralContainers[0].EphemeralContainerCommon.ReadinessProbe.SuccessThreshold": "1", ".Spec.EphemeralContainers[0].EphemeralContainerCommon.ReadinessProbe.TimeoutSeconds": "1", + ".Spec.EphemeralContainers[0].EphemeralContainerCommon.StartupProbe.FailureThreshold": "3", + ".Spec.EphemeralContainers[0].EphemeralContainerCommon.StartupProbe.Handler.HTTPGet.Path": `"/"`, + ".Spec.EphemeralContainers[0].EphemeralContainerCommon.StartupProbe.Handler.HTTPGet.Scheme": `"HTTP"`, + ".Spec.EphemeralContainers[0].EphemeralContainerCommon.StartupProbe.PeriodSeconds": "10", + ".Spec.EphemeralContainers[0].EphemeralContainerCommon.StartupProbe.SuccessThreshold": "1", + ".Spec.EphemeralContainers[0].EphemeralContainerCommon.StartupProbe.TimeoutSeconds": "1", ".Spec.InitContainers[0].Env[0].ValueFrom.FieldRef.APIVersion": `"v1"`, ".Spec.InitContainers[0].ImagePullPolicy": `"IfNotPresent"`, ".Spec.InitContainers[0].Lifecycle.PostStart.HTTPGet.Path": `"/"`, @@ -218,6 +248,12 @@ func TestPodDefaults(t *testing.T) { ".Spec.InitContainers[0].Resources.Requests": `{"":"0"}`, // this gets defaulted from the limits field ".Spec.InitContainers[0].TerminationMessagePath": `"/dev/termination-log"`, ".Spec.InitContainers[0].TerminationMessagePolicy": `"File"`, + ".Spec.InitContainers[0].StartupProbe.FailureThreshold": "3", + ".Spec.InitContainers[0].StartupProbe.Handler.HTTPGet.Path": `"/"`, + ".Spec.InitContainers[0].StartupProbe.Handler.HTTPGet.Scheme": `"HTTP"`, + ".Spec.InitContainers[0].StartupProbe.PeriodSeconds": "10", + ".Spec.InitContainers[0].StartupProbe.SuccessThreshold": "1", + ".Spec.InitContainers[0].StartupProbe.TimeoutSeconds": "1", ".Spec.RestartPolicy": `"Always"`, ".Spec.SchedulerName": `"default-scheduler"`, ".Spec.SecurityContext": `{}`, diff --git a/pkg/kubelet/prober/common_test.go b/pkg/kubelet/prober/common_test.go index 74cd10b4bff..8cec8bfc8eb 100644 --- a/pkg/kubelet/prober/common_test.go +++ b/pkg/kubelet/prober/common_test.go @@ -41,11 +41,16 @@ const ( var testContainerID = kubecontainer.ContainerID{Type: "test", ID: "cOnTaInEr_Id"} func getTestRunningStatus() v1.PodStatus { + return getTestRunningStatusWithStarted(true) +} + +func getTestRunningStatusWithStarted(started bool) v1.PodStatus { containerStatus := v1.ContainerStatus{ Name: testContainerName, ContainerID: testContainerID.String(), } containerStatus.State.Running = &v1.ContainerStateRunning{StartedAt: metav1.Now()} + containerStatus.Started = &started podStatus := v1.PodStatus{ Phase: v1.PodRunning, ContainerStatuses: []v1.ContainerStatus{containerStatus}, @@ -93,6 +98,8 @@ func setTestProbe(pod *v1.Pod, probeType probeType, probeSpec v1.Probe) { pod.Spec.Containers[0].ReadinessProbe = &probeSpec case liveness: pod.Spec.Containers[0].LivenessProbe = &probeSpec + case startup: + pod.Spec.Containers[0].StartupProbe = &probeSpec } } diff --git a/pkg/kubelet/prober/prober_manager_test.go b/pkg/kubelet/prober/prober_manager_test.go index 7021c392597..eda3f503dbd 100644 --- a/pkg/kubelet/prober/prober_manager_test.go +++ b/pkg/kubelet/prober/prober_manager_test.go @@ -28,7 +28,10 @@ import ( "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" + utilfeature "k8s.io/apiserver/pkg/util/feature" + featuregatetesting "k8s.io/component-base/featuregate/testing" "k8s.io/klog" + "k8s.io/kubernetes/pkg/features" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" "k8s.io/kubernetes/pkg/kubelet/prober/results" "k8s.io/kubernetes/pkg/probe" @@ -58,6 +61,8 @@ func TestAddRemovePods(t *testing.T) { Name: "no_probe1", }, { Name: "no_probe2", + }, { + Name: "no_probe3", }}, }, } @@ -68,20 +73,26 @@ func TestAddRemovePods(t *testing.T) { }, Spec: v1.PodSpec{ Containers: []v1.Container{{ - Name: "no_probe1", + Name: "probe1", }, { Name: "readiness", ReadinessProbe: defaultProbe, }, { - Name: "no_probe2", + Name: "probe2", }, { Name: "liveness", LivenessProbe: defaultProbe, + }, { + Name: "probe3", + }, { + Name: "startup", + StartupProbe: defaultProbe, }}, }, } m := newTestManager() + defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.StartupProbe, true)() defer cleanup(t, m) if err := expectProbes(m, nil); err != nil { t.Error(err) @@ -98,6 +109,7 @@ func TestAddRemovePods(t *testing.T) { probePaths := []probeKey{ {"probe_pod", "readiness", readiness}, {"probe_pod", "liveness", liveness}, + {"probe_pod", "startup", startup}, } if err := expectProbes(m, probePaths); err != nil { t.Error(err) @@ -127,6 +139,7 @@ func TestAddRemovePods(t *testing.T) { func TestCleanupPods(t *testing.T) { m := newTestManager() + defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.StartupProbe, true)() defer cleanup(t, m) podToCleanup := v1.Pod{ ObjectMeta: metav1.ObjectMeta{ @@ -139,6 +152,9 @@ func TestCleanupPods(t *testing.T) { }, { Name: "prober2", LivenessProbe: defaultProbe, + }, { + Name: "prober3", + StartupProbe: defaultProbe, }}, }, } @@ -153,6 +169,9 @@ func TestCleanupPods(t *testing.T) { }, { Name: "prober2", LivenessProbe: defaultProbe, + }, { + Name: "prober3", + StartupProbe: defaultProbe, }}, }, } @@ -166,10 +185,12 @@ func TestCleanupPods(t *testing.T) { removedProbes := []probeKey{ {"pod_cleanup", "prober1", readiness}, {"pod_cleanup", "prober2", liveness}, + {"pod_cleanup", "prober3", startup}, } expectedProbes := []probeKey{ {"pod_keep", "prober1", readiness}, {"pod_keep", "prober2", liveness}, + {"pod_keep", "prober3", startup}, } if err := waitForWorkerExit(m, removedProbes); err != nil { t.Fatal(err) @@ -181,6 +202,7 @@ func TestCleanupPods(t *testing.T) { func TestCleanupRepeated(t *testing.T) { m := newTestManager() + defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.StartupProbe, true)() defer cleanup(t, m) podTemplate := v1.Pod{ Spec: v1.PodSpec{ @@ -188,6 +210,7 @@ func TestCleanupRepeated(t *testing.T) { Name: "prober1", ReadinessProbe: defaultProbe, LivenessProbe: defaultProbe, + StartupProbe: defaultProbe, }}, }, } diff --git a/pkg/kubelet/prober/prober_test.go b/pkg/kubelet/prober/prober_test.go index 90a4d6c0415..dab63587059 100644 --- a/pkg/kubelet/prober/prober_test.go +++ b/pkg/kubelet/prober/prober_test.go @@ -280,7 +280,7 @@ func TestProbe(t *testing.T) { } for i, test := range tests { - for _, probeType := range [...]probeType{liveness, readiness} { + for _, probeType := range [...]probeType{liveness, readiness, startup} { prober := &prober{ refManager: kubecontainer.NewRefManager(), recorder: &record.FakeRecorder{}, @@ -292,6 +292,8 @@ func TestProbe(t *testing.T) { testContainer.LivenessProbe = test.probe case readiness: testContainer.ReadinessProbe = test.probe + case startup: + testContainer.StartupProbe = test.probe } if test.execError { prober.exec = fakeExecProber{test.execResult, errors.New("exec error")} diff --git a/pkg/kubelet/prober/worker_test.go b/pkg/kubelet/prober/worker_test.go index d7473a35bc6..0cd6de98df0 100644 --- a/pkg/kubelet/prober/worker_test.go +++ b/pkg/kubelet/prober/worker_test.go @@ -43,64 +43,64 @@ func init() { func TestDoProbe(t *testing.T) { m := newTestManager() - // Test statuses. - runningStatus := getTestRunningStatus() - pendingStatus := getTestRunningStatus() - pendingStatus.ContainerStatuses[0].State.Running = nil - terminatedStatus := getTestRunningStatus() - terminatedStatus.ContainerStatuses[0].State.Running = nil - terminatedStatus.ContainerStatuses[0].State.Terminated = &v1.ContainerStateTerminated{ - StartedAt: metav1.Now(), - } - otherStatus := getTestRunningStatus() - otherStatus.ContainerStatuses[0].Name = "otherContainer" - failedStatus := getTestRunningStatus() - failedStatus.Phase = v1.PodFailed + for _, probeType := range [...]probeType{liveness, readiness, startup} { + // Test statuses. + runningStatus := getTestRunningStatusWithStarted(probeType != startup) + pendingStatus := getTestRunningStatusWithStarted(probeType != startup) + pendingStatus.ContainerStatuses[0].State.Running = nil + terminatedStatus := getTestRunningStatusWithStarted(probeType != startup) + terminatedStatus.ContainerStatuses[0].State.Running = nil + terminatedStatus.ContainerStatuses[0].State.Terminated = &v1.ContainerStateTerminated{ + StartedAt: metav1.Now(), + } + otherStatus := getTestRunningStatusWithStarted(probeType != startup) + otherStatus.ContainerStatuses[0].Name = "otherContainer" + failedStatus := getTestRunningStatusWithStarted(probeType != startup) + failedStatus.Phase = v1.PodFailed - tests := []struct { - probe v1.Probe - podStatus *v1.PodStatus - expectContinue bool - expectSet bool - expectedResult results.Result - }{ - { // No status. - expectContinue: true, - }, - { // Pod failed - podStatus: &failedStatus, - }, - { // No container status - podStatus: &otherStatus, - expectContinue: true, - }, - { // Container waiting - podStatus: &pendingStatus, - expectContinue: true, - expectSet: true, - }, - { // Container terminated - podStatus: &terminatedStatus, - expectSet: true, - }, - { // Probe successful. - podStatus: &runningStatus, - expectContinue: true, - expectSet: true, - expectedResult: results.Success, - }, - { // Initial delay passed - podStatus: &runningStatus, - probe: v1.Probe{ - InitialDelaySeconds: -100, + tests := []struct { + probe v1.Probe + podStatus *v1.PodStatus + expectContinue bool + expectSet bool + expectedResult results.Result + }{ + { // No status. + expectContinue: true, }, - expectContinue: true, - expectSet: true, - expectedResult: results.Success, - }, - } + { // Pod failed + podStatus: &failedStatus, + }, + { // No container status + podStatus: &otherStatus, + expectContinue: true, + }, + { // Container waiting + podStatus: &pendingStatus, + expectContinue: true, + expectSet: true, + }, + { // Container terminated + podStatus: &terminatedStatus, + expectSet: true, + }, + { // Probe successful. + podStatus: &runningStatus, + expectContinue: true, + expectSet: true, + expectedResult: results.Success, + }, + { // Initial delay passed + podStatus: &runningStatus, + probe: v1.Probe{ + InitialDelaySeconds: -100, + }, + expectContinue: true, + expectSet: true, + expectedResult: results.Success, + }, + } - for _, probeType := range [...]probeType{liveness, readiness} { for i, test := range tests { w := newTestWorker(m, probeType, test.probe) if test.podStatus != nil { @@ -127,17 +127,18 @@ func TestDoProbe(t *testing.T) { func TestInitialDelay(t *testing.T) { m := newTestManager() - for _, probeType := range [...]probeType{liveness, readiness} { + for _, probeType := range [...]probeType{liveness, readiness, startup} { w := newTestWorker(m, probeType, v1.Probe{ InitialDelaySeconds: 10, }) - m.statusManager.SetPodStatus(w.pod, getTestRunningStatus()) + m.statusManager.SetPodStatus(w.pod, getTestRunningStatusWithStarted(probeType != startup)) expectContinue(t, w, w.doProbe(), "during initial delay") + // Default value depends on probe, true for liveness, otherwise false. expectResult(t, w, results.Result(probeType == liveness), "during initial delay") // 100 seconds later... - laterStatus := getTestRunningStatus() + laterStatus := getTestRunningStatusWithStarted(probeType != startup) laterStatus.ContainerStatuses[0].State.Running.StartedAt.Time = time.Now().Add(-100 * time.Second) m.statusManager.SetPodStatus(w.pod, laterStatus) @@ -219,10 +220,10 @@ func TestSuccessThreshold(t *testing.T) { func TestCleanUp(t *testing.T) { m := newTestManager() - for _, probeType := range [...]probeType{liveness, readiness} { + for _, probeType := range [...]probeType{liveness, readiness, startup} { key := probeKey{testPodUID, testContainerName, probeType} w := newTestWorker(m, probeType, v1.Probe{}) - m.statusManager.SetPodStatus(w.pod, getTestRunningStatus()) + m.statusManager.SetPodStatus(w.pod, getTestRunningStatusWithStarted(probeType != startup)) go w.run() m.workers[key] = w @@ -297,6 +298,8 @@ func resultsManager(m *manager, probeType probeType) results.Manager { return m.readinessManager case liveness: return m.livenessManager + case startup: + return m.startupManager } panic(fmt.Errorf("Unhandled case: %v", probeType)) } @@ -307,38 +310,41 @@ func (p crashingExecProber) Probe(_ exec.Cmd) (probe.Result, string, error) { panic("Intentional Probe crash.") } -func TestOnHoldOnLivenessCheckFailure(t *testing.T) { +func TestOnHoldOnLivenessOrStartupCheckFailure(t *testing.T) { m := newTestManager() - w := newTestWorker(m, liveness, v1.Probe{SuccessThreshold: 1, FailureThreshold: 1}) - status := getTestRunningStatus() - m.statusManager.SetPodStatus(w.pod, getTestRunningStatus()) - // First probe should fail. - m.prober.exec = fakeExecProber{probe.Failure, nil} - msg := "first probe" - expectContinue(t, w, w.doProbe(), msg) - expectResult(t, w, results.Failure, msg) - if !w.onHold { - t.Errorf("Prober should be on hold due to liveness check failure") - } - // Set fakeExecProber to return success. However, the result will remain - // failure because the worker is on hold and won't probe. - m.prober.exec = fakeExecProber{probe.Success, nil} - msg = "while on hold" - expectContinue(t, w, w.doProbe(), msg) - expectResult(t, w, results.Failure, msg) - if !w.onHold { - t.Errorf("Prober should be on hold due to liveness check failure") - } + for _, probeType := range [...]probeType{liveness, startup} { + w := newTestWorker(m, probeType, v1.Probe{SuccessThreshold: 1, FailureThreshold: 1}) + status := getTestRunningStatusWithStarted(probeType != startup) + m.statusManager.SetPodStatus(w.pod, status) - // Set a new container ID to lift the hold. The next probe will succeed. - status.ContainerStatuses[0].ContainerID = "test://newCont_ID" - m.statusManager.SetPodStatus(w.pod, status) - msg = "hold lifted" - expectContinue(t, w, w.doProbe(), msg) - expectResult(t, w, results.Success, msg) - if w.onHold { - t.Errorf("Prober should not be on hold anymore") + // First probe should fail. + m.prober.exec = fakeExecProber{probe.Failure, nil} + msg := "first probe" + expectContinue(t, w, w.doProbe(), msg) + expectResult(t, w, results.Failure, msg) + if !w.onHold { + t.Errorf("Prober should be on hold due to %s check failure", probeType) + } + // Set fakeExecProber to return success. However, the result will remain + // failure because the worker is on hold and won't probe. + m.prober.exec = fakeExecProber{probe.Success, nil} + msg = "while on hold" + expectContinue(t, w, w.doProbe(), msg) + expectResult(t, w, results.Failure, msg) + if !w.onHold { + t.Errorf("Prober should be on hold due to %s check failure", probeType) + } + + // Set a new container ID to lift the hold. The next probe will succeed. + status.ContainerStatuses[0].ContainerID = "test://newCont_ID" + m.statusManager.SetPodStatus(w.pod, status) + msg = "hold lifted" + expectContinue(t, w, w.doProbe(), msg) + expectResult(t, w, results.Success, msg) + if w.onHold { + t.Errorf("Prober should not be on hold anymore") + } } } @@ -382,3 +388,79 @@ func TestResultRunOnLivenessCheckFailure(t *testing.T) { t.Errorf("Prober resultRun should be reset to 0") } } + +func TestResultRunOnStartupCheckFailure(t *testing.T) { + m := newTestManager() + w := newTestWorker(m, startup, v1.Probe{SuccessThreshold: 1, FailureThreshold: 3}) + m.statusManager.SetPodStatus(w.pod, getTestRunningStatusWithStarted(false)) + + // Below FailureThreshold leaves probe state unchanged + // which is failed for startup at first. + m.prober.exec = fakeExecProber{probe.Failure, nil} + msg := "probe failure, result failure" + expectContinue(t, w, w.doProbe(), msg) + expectResult(t, w, results.Failure, msg) + if w.resultRun != 1 { + t.Errorf("Prober resultRun should be 1") + } + + m.prober.exec = fakeExecProber{probe.Failure, nil} + msg = "2nd probe failure, result failure" + expectContinue(t, w, w.doProbe(), msg) + expectResult(t, w, results.Failure, msg) + if w.resultRun != 2 { + t.Errorf("Prober resultRun should be 2") + } + + // Exceeding FailureThreshold should cause resultRun to + // reset to 0 so that the probe on the restarted pod + // also gets FailureThreshold attempts to succeed. + m.prober.exec = fakeExecProber{probe.Failure, nil} + msg = "3rd probe failure, result failure" + expectContinue(t, w, w.doProbe(), msg) + expectResult(t, w, results.Failure, msg) + if w.resultRun != 0 { + t.Errorf("Prober resultRun should be reset to 0") + } +} + +func TestLivenessProbeDisabledByStarted(t *testing.T) { + m := newTestManager() + w := newTestWorker(m, liveness, v1.Probe{SuccessThreshold: 1, FailureThreshold: 1}) + m.statusManager.SetPodStatus(w.pod, getTestRunningStatusWithStarted(false)) + // livenessProbe fails, but is disabled + m.prober.exec = fakeExecProber{probe.Failure, nil} + msg := "Not started, probe failure, result success" + expectContinue(t, w, w.doProbe(), msg) + expectResult(t, w, results.Success, msg) + // setting started state + m.statusManager.SetContainerStartup(w.pod.UID, w.containerID, true) + // livenessProbe fails + m.prober.exec = fakeExecProber{probe.Failure, nil} + msg = "Started, probe failure, result failure" + expectContinue(t, w, w.doProbe(), msg) + expectResult(t, w, results.Failure, msg) +} + +func TestStartupProbeDisabledByStarted(t *testing.T) { + m := newTestManager() + w := newTestWorker(m, startup, v1.Probe{SuccessThreshold: 1, FailureThreshold: 2}) + m.statusManager.SetPodStatus(w.pod, getTestRunningStatusWithStarted(false)) + // startupProbe fails + m.prober.exec = fakeExecProber{probe.Failure, nil} + msg := "Not started, probe failure, result failure" + expectContinue(t, w, w.doProbe(), msg) + expectResult(t, w, results.Failure, msg) + // startupProbe succeeds + m.prober.exec = fakeExecProber{probe.Success, nil} + msg = "Started, probe success, result success" + expectContinue(t, w, w.doProbe(), msg) + expectResult(t, w, results.Success, msg) + // setting started state + m.statusManager.SetContainerStartup(w.pod.UID, w.containerID, true) + // startupProbe fails, but is disabled + m.prober.exec = fakeExecProber{probe.Failure, nil} + msg = "Started, probe failure, result success" + expectContinue(t, w, w.doProbe(), msg) + expectResult(t, w, results.Success, msg) +} diff --git a/pkg/kubelet/status/status_manager_test.go b/pkg/kubelet/status/status_manager_test.go index ab3bd2c760b..a616b1368e2 100644 --- a/pkg/kubelet/status/status_manager_test.go +++ b/pkg/kubelet/status/status_manager_test.go @@ -679,6 +679,90 @@ func TestSetContainerReadiness(t *testing.T) { verifyReadiness("ignore non-existent", &status, true, true, true) } +func TestSetContainerStartup(t *testing.T) { + cID1 := kubecontainer.ContainerID{Type: "test", ID: "1"} + cID2 := kubecontainer.ContainerID{Type: "test", ID: "2"} + containerStatuses := []v1.ContainerStatus{ + { + Name: "c1", + ContainerID: cID1.String(), + Ready: false, + }, { + Name: "c2", + ContainerID: cID2.String(), + Ready: false, + }, + } + status := v1.PodStatus{ + ContainerStatuses: containerStatuses, + Conditions: []v1.PodCondition{{ + Type: v1.PodReady, + Status: v1.ConditionFalse, + }}, + } + pod := getTestPod() + pod.Spec.Containers = []v1.Container{{Name: "c1"}, {Name: "c2"}} + + // Verify expected startup of containers & pod. + verifyStartup := func(step string, status *v1.PodStatus, c1Started, c2Started, podStarted bool) { + for _, c := range status.ContainerStatuses { + switch c.ContainerID { + case cID1.String(): + if (c.Started != nil && *c.Started) != c1Started { + t.Errorf("[%s] Expected startup of c1 to be %v but was %v", step, c1Started, c.Started) + } + case cID2.String(): + if (c.Started != nil && *c.Started) != c2Started { + t.Errorf("[%s] Expected startup of c2 to be %v but was %v", step, c2Started, c.Started) + } + default: + t.Fatalf("[%s] Unexpected container: %+v", step, c) + } + } + } + + m := newTestManager(&fake.Clientset{}) + // Add test pod because the container spec has been changed. + m.podManager.AddPod(pod) + + t.Log("Setting startup before status should fail.") + m.SetContainerStartup(pod.UID, cID1, true) + verifyUpdates(t, m, 0) + if status, ok := m.GetPodStatus(pod.UID); ok { + t.Errorf("Unexpected PodStatus: %+v", status) + } + + t.Log("Setting initial status.") + m.SetPodStatus(pod, status) + verifyUpdates(t, m, 1) + status = expectPodStatus(t, m, pod) + verifyStartup("initial", &status, false, false, false) + + t.Log("Setting unchanged startup should do nothing.") + m.SetContainerStartup(pod.UID, cID1, false) + verifyUpdates(t, m, 1) + status = expectPodStatus(t, m, pod) + verifyStartup("unchanged", &status, false, false, false) + + t.Log("Setting container startup should generate update but not pod startup.") + m.SetContainerStartup(pod.UID, cID1, true) + verifyUpdates(t, m, 1) // Started = nil to false + status = expectPodStatus(t, m, pod) + verifyStartup("c1 ready", &status, true, false, false) + + t.Log("Setting both containers to ready should update pod startup.") + m.SetContainerStartup(pod.UID, cID2, true) + verifyUpdates(t, m, 1) + status = expectPodStatus(t, m, pod) + verifyStartup("all ready", &status, true, true, true) + + t.Log("Setting non-existent container startup should fail.") + m.SetContainerStartup(pod.UID, kubecontainer.ContainerID{Type: "test", ID: "foo"}, true) + verifyUpdates(t, m, 0) + status = expectPodStatus(t, m, pod) + verifyStartup("ignore non-existent", &status, true, true, true) +} + func TestSyncBatchCleanupVersions(t *testing.T) { m := newTestManager(&fake.Clientset{}) testPod := getTestPod() diff --git a/test/e2e/common/container_probe.go b/test/e2e/common/container_probe.go index fea24588cd8..e4533054604 100644 --- a/test/e2e/common/container_probe.go +++ b/test/e2e/common/container_probe.go @@ -71,9 +71,9 @@ var _ = framework.KubeDescribe("Probing container", func() { // We assume the pod became ready when the container became ready. This // is true for a single container pod. - readyTime, err := getTransitionTimeForReadyCondition(p) + readyTime, err := GetTransitionTimeForReadyCondition(p) framework.ExpectNoError(err) - startedTime, err := getContainerStartedTime(p, containerName) + startedTime, err := GetContainerStartedTime(p, containerName) framework.ExpectNoError(err) e2elog.Logf("Container started at %v, pod became ready at %v", startedTime, readyTime) @@ -125,7 +125,7 @@ var _ = framework.KubeDescribe("Probing container", func() { FailureThreshold: 1, } pod := busyBoxPodSpec(nil, livenessProbe, cmd) - runLivenessTest(f, pod, 1, defaultObservationTimeout) + RunLivenessTest(f, pod, 1, defaultObservationTimeout) }) /* @@ -141,7 +141,7 @@ var _ = framework.KubeDescribe("Probing container", func() { FailureThreshold: 1, } pod := busyBoxPodSpec(nil, livenessProbe, cmd) - runLivenessTest(f, pod, 0, defaultObservationTimeout) + RunLivenessTest(f, pod, 0, defaultObservationTimeout) }) /* @@ -156,7 +156,7 @@ var _ = framework.KubeDescribe("Probing container", func() { FailureThreshold: 1, } pod := livenessPodSpec(nil, livenessProbe) - runLivenessTest(f, pod, 1, defaultObservationTimeout) + RunLivenessTest(f, pod, 1, defaultObservationTimeout) }) /* @@ -171,7 +171,7 @@ var _ = framework.KubeDescribe("Probing container", func() { FailureThreshold: 1, } pod := livenessPodSpec(nil, livenessProbe) - runLivenessTest(f, pod, 0, defaultObservationTimeout) + RunLivenessTest(f, pod, 0, defaultObservationTimeout) }) /* @@ -186,7 +186,7 @@ var _ = framework.KubeDescribe("Probing container", func() { FailureThreshold: 1, } pod := livenessPodSpec(nil, livenessProbe) - runLivenessTest(f, pod, 5, time.Minute*5) + RunLivenessTest(f, pod, 5, time.Minute*5) }) /* @@ -202,7 +202,7 @@ var _ = framework.KubeDescribe("Probing container", func() { FailureThreshold: 5, // to accommodate nodes which are slow in bringing up containers. } pod := testWebServerPodSpec(nil, livenessProbe, "test-webserver", 80) - runLivenessTest(f, pod, 0, defaultObservationTimeout) + RunLivenessTest(f, pod, 0, defaultObservationTimeout) }) /* @@ -221,7 +221,7 @@ var _ = framework.KubeDescribe("Probing container", func() { FailureThreshold: 1, } pod := busyBoxPodSpec(nil, livenessProbe, cmd) - runLivenessTest(f, pod, 1, defaultObservationTimeout) + RunLivenessTest(f, pod, 1, defaultObservationTimeout) }) /* @@ -236,7 +236,7 @@ var _ = framework.KubeDescribe("Probing container", func() { FailureThreshold: 1, } pod := livenessPodSpec(nil, livenessProbe) - runLivenessTest(f, pod, 1, defaultObservationTimeout) + RunLivenessTest(f, pod, 1, defaultObservationTimeout) }) /* @@ -251,7 +251,7 @@ var _ = framework.KubeDescribe("Probing container", func() { FailureThreshold: 1, } pod := livenessPodSpec(nil, livenessProbe) - runLivenessTest(f, pod, 0, defaultObservationTimeout) + RunLivenessTest(f, pod, 0, defaultObservationTimeout) // Expect an event of type "ProbeWarning". expectedEvent := fields.Set{ "involvedObject.kind": "Pod", @@ -264,7 +264,7 @@ var _ = framework.KubeDescribe("Probing container", func() { }) }) -func getContainerStartedTime(p *v1.Pod, containerName string) (time.Time, error) { +func GetContainerStartedTime(p *v1.Pod, containerName string) (time.Time, error) { for _, status := range p.Status.ContainerStatuses { if status.Name != containerName { continue @@ -277,7 +277,7 @@ func getContainerStartedTime(p *v1.Pod, containerName string) (time.Time, error) return time.Time{}, fmt.Errorf("cannot find container named %q", containerName) } -func getTransitionTimeForReadyCondition(p *v1.Pod) (time.Time, error) { +func GetTransitionTimeForReadyCondition(p *v1.Pod) (time.Time, error) { for _, cond := range p.Status.Conditions { if cond.Type == v1.PodReady { return cond.LastTransitionTime.Time, nil @@ -404,7 +404,7 @@ func (b webserverProbeBuilder) build() *v1.Probe { return probe } -func runLivenessTest(f *framework.Framework, pod *v1.Pod, expectNumRestarts int, timeout time.Duration) { +func RunLivenessTest(f *framework.Framework, pod *v1.Pod, expectNumRestarts int, timeout time.Duration) { podClient := f.PodClient() ns := f.Namespace.Name gomega.Expect(pod.Spec.Containers).NotTo(gomega.BeEmpty()) diff --git a/test/e2e_node/startup_probe_test.go b/test/e2e_node/startup_probe_test.go new file mode 100644 index 00000000000..d7b50c2be80 --- /dev/null +++ b/test/e2e_node/startup_probe_test.go @@ -0,0 +1,212 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e_node + +import ( + "time" + + "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/uuid" + "k8s.io/kubernetes/pkg/features" + kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" + "k8s.io/kubernetes/test/e2e/common" + "k8s.io/kubernetes/test/e2e/framework" + e2elog "k8s.io/kubernetes/test/e2e/framework/log" + testutils "k8s.io/kubernetes/test/utils" + imageutils "k8s.io/kubernetes/test/utils/image" + + "github.com/onsi/ginkgo" + "github.com/onsi/gomega" +) + +const ( + defaultObservationTimeout = time.Minute * 4 +) + +var _ = framework.KubeDescribe("StartupProbe [Serial] [Disruptive] [NodeFeature:StartupProbe]", func() { + f := framework.NewDefaultFramework("critical-pod-test") + var podClient *framework.PodClient + + /* + These tests are located here as they require tempSetCurrentKubeletConfig to enable the feature gate for startupProbe. + Once the feature gate has been removed, these tests should come back to test/e2e/common/container_probe.go. + */ + ginkgo.Context("when a container has a startup probe", func() { + tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) { + if initialConfig.FeatureGates == nil { + initialConfig.FeatureGates = make(map[string]bool) + } + initialConfig.FeatureGates[string(features.StartupProbe)] = true + }) + + /* + Release : v1.16 + Testname: Pod liveness probe, using local file, delayed by startup probe + Description: A Pod is created with liveness probe that uses ‘exec’ command to cat the non-existent /tmp/health file. Liveness probe MUST NOT fail until startup probe expires. + */ + framework.ConformanceIt("should *not* be restarted with a exec \"cat /tmp/health\" because startup probe delays it [NodeConformance]", func() { + cmd := []string{"/bin/sh", "-c", "sleep 600"} + livenessProbe := &v1.Probe{ + Handler: v1.Handler{ + Exec: &v1.ExecAction{ + Command: []string{"cat", "/tmp/health"}, + }, + }, + InitialDelaySeconds: 15, + FailureThreshold: 1, + } + startupProbe := &v1.Probe{ + Handler: v1.Handler{ + Exec: &v1.ExecAction{ + Command: []string{"cat", "/tmp/health"}, + }, + }, + InitialDelaySeconds: 15, + FailureThreshold: 60, + } + pod := startupPodSpec(startupProbe, nil, livenessProbe, cmd) + common.RunLivenessTest(f, pod, 0, defaultObservationTimeout) + }) + + /* + Release : v1.16 + Testname: Pod liveness probe, using local file, delayed by startup probe + Description: A Pod is created with liveness probe that uses ‘exec’ command to cat the non-existent /tmp/health file. Liveness probe MUST fail after startup probe expires. The Pod MUST now be killed and restarted incrementing restart count to 1. + */ + framework.ConformanceIt("should be restarted with a exec \"cat /tmp/health\" because startup probe does not delay it long enough [NodeConformance]", func() { + cmd := []string{"/bin/sh", "-c", "sleep 600"} + livenessProbe := &v1.Probe{ + Handler: v1.Handler{ + Exec: &v1.ExecAction{ + Command: []string{"cat", "/tmp/health"}, + }, + }, + InitialDelaySeconds: 15, + FailureThreshold: 1, + } + startupProbe := &v1.Probe{ + Handler: v1.Handler{ + Exec: &v1.ExecAction{ + Command: []string{"cat", "/tmp/health"}, + }, + }, + InitialDelaySeconds: 15, + FailureThreshold: 3, + } + pod := startupPodSpec(startupProbe, nil, livenessProbe, cmd) + common.RunLivenessTest(f, pod, 1, defaultObservationTimeout) + }) + + /* + Release : v1.16 + Testname: Pod liveness probe, using local file, startup finished restart + Description: A Pod is created with liveness probe that uses ‘exec’ command to cat /temp/health file. The Container is started by creating /tmp/startup after 10 seconds, triggering liveness probe to fail. The Pod MUST now be killed and restarted incrementing restart count to 1. + */ + framework.ConformanceIt("should be restarted with a exec \"cat /tmp/health\" after startup probe succeeds it [NodeConformance]", func() { + cmd := []string{"/bin/sh", "-c", "sleep 10; echo ok >/tmp/startup; sleep 600"} + livenessProbe := &v1.Probe{ + Handler: v1.Handler{ + Exec: &v1.ExecAction{ + Command: []string{"cat", "/tmp/health"}, + }, + }, + InitialDelaySeconds: 15, + FailureThreshold: 1, + } + startupProbe := &v1.Probe{ + Handler: v1.Handler{ + Exec: &v1.ExecAction{ + Command: []string{"cat", "/tmp/startup"}, + }, + }, + InitialDelaySeconds: 15, + FailureThreshold: 60, + } + pod := startupPodSpec(startupProbe, nil, livenessProbe, cmd) + common.RunLivenessTest(f, pod, 1, defaultObservationTimeout) + }) + + /* + Release : v1.16 + Testname: Pod readiness probe, delayed by startup probe + Description: A Pod is created with startup and readiness probes. The Container is started by creating /tmp/startup after 45 seconds, delaying the ready state by this amount of time. This is similar to the "Pod readiness probe, with initial delay" test. + */ + framework.ConformanceIt("should not be ready until startupProbe succeeds [NodeConformance]", func() { + cmd := []string{"/bin/sh", "-c", "echo ok >/tmp/health; sleep 45; echo ok >/tmp/startup; sleep 600"} + readinessProbe := &v1.Probe{ + Handler: v1.Handler{ + Exec: &v1.ExecAction{ + Command: []string{"cat", "/tmp/health"}, + }, + }, + InitialDelaySeconds: 0, + } + startupProbe := &v1.Probe{ + Handler: v1.Handler{ + Exec: &v1.ExecAction{ + Command: []string{"cat", "/tmp/startup"}, + }, + }, + InitialDelaySeconds: 0, + FailureThreshold: 60, + } + p := podClient.Create(startupPodSpec(startupProbe, readinessProbe, nil, cmd)) + + p, err := podClient.Get(p.Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + + f.WaitForPodReady(p.Name) + isReady, err := testutils.PodRunningReady(p) + framework.ExpectNoError(err) + gomega.Expect(isReady).To(gomega.BeTrue(), "pod should be ready") + + // We assume the pod became ready when the container became ready. This + // is true for a single container pod. + readyTime, err := common.GetTransitionTimeForReadyCondition(p) + framework.ExpectNoError(err) + startedTime, err := common.GetContainerStartedTime(p, "busybox") + framework.ExpectNoError(err) + + e2elog.Logf("Container started at %v, pod became ready at %v", startedTime, readyTime) + if readyTime.Sub(startedTime) < 40*time.Second { + e2elog.Failf("Pod became ready before startupProbe succeeded") + } + }) + }) +}) + +func startupPodSpec(startupProbe, readinessProbe, livenessProbe *v1.Probe, cmd []string) *v1.Pod { + return &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "startup-" + string(uuid.NewUUID()), + Labels: map[string]string{"test": "startup"}, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "busybox", + Image: imageutils.GetE2EImage(imageutils.BusyBox), + Command: cmd, + LivenessProbe: livenessProbe, + ReadinessProbe: readinessProbe, + StartupProbe: startupProbe, + }, + }, + }, + } +}