From aa9a9e86e18541abd1c08751910ab9be9d2f29e1 Mon Sep 17 00:00:00 2001
From: mochizuki875 <mzk875@gmail.com>
Date: Fri, 13 Oct 2023 02:53:08 +0000
Subject: [PATCH] Stop StartupProbe explicity when successThrethold is reached

---
 pkg/kubelet/prober/common_test.go |  4 +++
 pkg/kubelet/prober/worker.go      |  5 ++-
 pkg/kubelet/prober/worker_test.go | 53 ++++++++++++++++++++++++++++++-
 3 files changed, 60 insertions(+), 2 deletions(-)

diff --git a/pkg/kubelet/prober/common_test.go b/pkg/kubelet/prober/common_test.go
index d071392a1c3..be435cd1518 100644
--- a/pkg/kubelet/prober/common_test.go
+++ b/pkg/kubelet/prober/common_test.go
@@ -46,6 +46,10 @@ func getTestRunningStatus() v1.PodStatus {
 	return getTestRunningStatusWithStarted(true)
 }
 
+func getTestNotRunningStatus() v1.PodStatus {
+	return getTestRunningStatusWithStarted(false)
+}
+
 func getTestRunningStatusWithStarted(started bool) v1.PodStatus {
 	containerStatus := v1.ContainerStatus{
 		Name:        testContainerName,
diff --git a/pkg/kubelet/prober/worker.go b/pkg/kubelet/prober/worker.go
index ea2d7243387..5dd1ef271c7 100644
--- a/pkg/kubelet/prober/worker.go
+++ b/pkg/kubelet/prober/worker.go
@@ -316,11 +316,14 @@ func (w *worker) doProbe(ctx context.Context) (keepGoing bool) {
 
 	w.resultsManager.Set(w.containerID, result, w.pod)
 
-	if (w.probeType == liveness || w.probeType == startup) && result == results.Failure {
+	if (w.probeType == liveness && result == results.Failure) || w.probeType == startup {
 		// The container fails a liveness/startup check, it will need to be restarted.
 		// Stop probing until we see a new container ID. This is to reduce the
 		// chance of hitting #21751, where running `docker exec` when a
 		// container is being stopped may lead to corrupted container state.
+		// In addition, if the container succeeds a startup probe, we should stop probing
+		// until the container is restarted.
+		// This is to prevent extra Probe executions #117153.
 		w.onHold = true
 		w.resultRun = 0
 	}
diff --git a/pkg/kubelet/prober/worker_test.go b/pkg/kubelet/prober/worker_test.go
index 4d1c6d140c1..c576fc51431 100644
--- a/pkg/kubelet/prober/worker_test.go
+++ b/pkg/kubelet/prober/worker_test.go
@@ -268,6 +268,48 @@ func TestSuccessThreshold(t *testing.T) {
 	}
 }
 
+func TestStartupProbeSuccessThreshold(t *testing.T) {
+	ctx := context.Background()
+	m := newTestManager()
+	successThreshold := 1
+	failureThreshold := 3
+	w := newTestWorker(m, startup, v1.Probe{SuccessThreshold: int32(successThreshold), FailureThreshold: int32(failureThreshold)})
+	m.statusManager.SetPodStatus(w.pod, getTestNotRunningStatus())
+
+	m.prober.exec = fakeExecProber{probe.Success, nil}
+
+	for i := 0; i < successThreshold+1; i++ {
+		msg := fmt.Sprintf("%d success", successThreshold)
+		expectContinue(t, w, w.doProbe(ctx), msg)
+		expectResult(t, w, results.Success, msg)
+		expectResultRun(t, w, 0, msg)
+	}
+}
+
+func TestStartupProbeFailureThreshold(t *testing.T) {
+	ctx := context.Background()
+	m := newTestManager()
+	successThreshold := 1
+	failureThreshold := 3
+	w := newTestWorker(m, startup, v1.Probe{SuccessThreshold: int32(successThreshold), FailureThreshold: int32(failureThreshold)})
+	m.statusManager.SetPodStatus(w.pod, getTestNotRunningStatus())
+
+	m.prober.exec = fakeExecProber{probe.Failure, nil}
+
+	for i := 0; i < failureThreshold+1; i++ {
+		msg := fmt.Sprintf("%d failure", i+1)
+		expectContinue(t, w, w.doProbe(ctx), msg)
+		if i < failureThreshold-1 {
+			expectResult(t, w, results.Unknown, msg)
+			expectResultRun(t, w, i+1, msg)
+		} else {
+			msg := fmt.Sprintf("%d failure", failureThreshold)
+			expectResult(t, w, results.Failure, msg)
+			expectResultRun(t, w, 0, msg)
+		}
+	}
+}
+
 func TestCleanUp(t *testing.T) {
 	m := newTestManager()
 
@@ -315,6 +357,13 @@ func expectResult(t *testing.T, w *worker, expectedResult results.Result, msg st
 	}
 }
 
+func expectResultRun(t *testing.T, w *worker, expectedResultRun int, msg string) {
+	if w.resultRun != expectedResultRun {
+		t.Errorf("[%s - %s] Expected result to be %v, but was %v",
+			w.probeType, msg, expectedResultRun, w.resultRun)
+	}
+}
+
 func expectContinue(t *testing.T, w *worker, c bool, msg string) {
 	if !c {
 		t.Errorf("[%s - %s] Expected to continue, but did not", w.probeType, msg)
@@ -366,8 +415,10 @@ func TestOnHoldOnLivenessOrStartupCheckFailure(t *testing.T) {
 		msg = "hold lifted"
 		expectContinue(t, w, w.doProbe(ctx), msg)
 		expectResult(t, w, results.Success, msg)
-		if w.onHold {
+		if probeType == liveness && w.onHold {
 			t.Errorf("Prober should not be on hold anymore")
+		} else if probeType == startup && !w.onHold {
+			t.Errorf("Prober should be on hold due to %s check success", probeType)
 		}
 	}
 }