mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-09-14 21:53:52 +00:00
Merge pull request #121206 from mochizuki875/fix_startup_probe_117153
Stop StartupProbe explicity when successThrethold is reached
This commit is contained in:
@@ -46,6 +46,10 @@ func getTestRunningStatus() v1.PodStatus {
|
|||||||
return getTestRunningStatusWithStarted(true)
|
return getTestRunningStatusWithStarted(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getTestNotRunningStatus() v1.PodStatus {
|
||||||
|
return getTestRunningStatusWithStarted(false)
|
||||||
|
}
|
||||||
|
|
||||||
func getTestRunningStatusWithStarted(started bool) v1.PodStatus {
|
func getTestRunningStatusWithStarted(started bool) v1.PodStatus {
|
||||||
containerStatus := v1.ContainerStatus{
|
containerStatus := v1.ContainerStatus{
|
||||||
Name: testContainerName,
|
Name: testContainerName,
|
||||||
|
@@ -321,11 +321,14 @@ func (w *worker) doProbe(ctx context.Context) (keepGoing bool) {
|
|||||||
|
|
||||||
w.resultsManager.Set(w.containerID, result, w.pod)
|
w.resultsManager.Set(w.containerID, result, w.pod)
|
||||||
|
|
||||||
if (w.probeType == liveness || w.probeType == startup) && result == results.Failure {
|
if (w.probeType == liveness && result == results.Failure) || w.probeType == startup {
|
||||||
// The container fails a liveness/startup check, it will need to be restarted.
|
// The container fails a liveness/startup check, it will need to be restarted.
|
||||||
// Stop probing until we see a new container ID. This is to reduce the
|
// Stop probing until we see a new container ID. This is to reduce the
|
||||||
// chance of hitting #21751, where running `docker exec` when a
|
// chance of hitting #21751, where running `docker exec` when a
|
||||||
// container is being stopped may lead to corrupted container state.
|
// container is being stopped may lead to corrupted container state.
|
||||||
|
// In addition, if the threshold for each result of a startup probe is exceeded, we should stop probing
|
||||||
|
// until the container is restarted.
|
||||||
|
// This is to prevent extra Probe executions #117153.
|
||||||
w.onHold = true
|
w.onHold = true
|
||||||
w.resultRun = 0
|
w.resultRun = 0
|
||||||
}
|
}
|
||||||
|
@@ -268,6 +268,91 @@ func TestSuccessThreshold(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestStartupProbeSuccessThreshold(t *testing.T) {
|
||||||
|
ctx := context.Background()
|
||||||
|
m := newTestManager()
|
||||||
|
successThreshold := 1
|
||||||
|
failureThreshold := 3
|
||||||
|
w := newTestWorker(m, startup, v1.Probe{SuccessThreshold: int32(successThreshold), FailureThreshold: int32(failureThreshold)})
|
||||||
|
m.statusManager.SetPodStatus(w.pod, getTestNotRunningStatus())
|
||||||
|
m.prober.exec = fakeExecProber{probe.Success, nil}
|
||||||
|
|
||||||
|
for i := 0; i < successThreshold+1; i++ {
|
||||||
|
if i < successThreshold {
|
||||||
|
// Probe should not be on hold and will continue to be excuted
|
||||||
|
// until successThreshold is met
|
||||||
|
if w.onHold {
|
||||||
|
t.Errorf("Prober should not be on hold")
|
||||||
|
}
|
||||||
|
msg := fmt.Sprintf("%d success", i+1)
|
||||||
|
expectContinue(t, w, w.doProbe(ctx), msg)
|
||||||
|
expectResult(t, w, results.Success, msg)
|
||||||
|
} else {
|
||||||
|
// Probe should be on hold and will not be executed anymore
|
||||||
|
// when successThreshold is met
|
||||||
|
if !w.onHold {
|
||||||
|
t.Errorf("Prober should be on hold because successThreshold is exceeded")
|
||||||
|
}
|
||||||
|
// Meeting or exceeding successThreshold should cause resultRun to reset to 0
|
||||||
|
if w.resultRun != 0 {
|
||||||
|
t.Errorf("Prober resultRun should be 0, but %d", w.resultRun)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStartupProbeFailureThreshold(t *testing.T) {
|
||||||
|
ctx := context.Background()
|
||||||
|
m := newTestManager()
|
||||||
|
successThreshold := 1
|
||||||
|
failureThreshold := 3
|
||||||
|
w := newTestWorker(m, startup, v1.Probe{SuccessThreshold: int32(successThreshold), FailureThreshold: int32(failureThreshold)})
|
||||||
|
m.statusManager.SetPodStatus(w.pod, getTestNotRunningStatus())
|
||||||
|
m.prober.exec = fakeExecProber{probe.Failure, nil}
|
||||||
|
|
||||||
|
for i := 0; i < failureThreshold+1; i++ {
|
||||||
|
if i < failureThreshold {
|
||||||
|
// Probe should not be on hold and will continue to be excuted
|
||||||
|
// until failureThreshold is met
|
||||||
|
if w.onHold {
|
||||||
|
t.Errorf("Prober should not be on hold")
|
||||||
|
}
|
||||||
|
msg := fmt.Sprintf("%d failure", i+1)
|
||||||
|
expectContinue(t, w, w.doProbe(ctx), msg)
|
||||||
|
switch i {
|
||||||
|
case 0, 1:
|
||||||
|
// At this point, the expected result is Unknown
|
||||||
|
// because w.resultsManager.Set() will be called after FailureThreshold is reached
|
||||||
|
expectResult(t, w, results.Unknown, msg)
|
||||||
|
// resultRun should be incremented until failureThreshold is met
|
||||||
|
if w.resultRun != i+1 {
|
||||||
|
t.Errorf("Prober resultRun should be %d, but %d", i+1, w.resultRun)
|
||||||
|
}
|
||||||
|
case 2:
|
||||||
|
// The expected result is Failure
|
||||||
|
// because w.resultsManager.Set() will be called due to resultRun reaching failureThreshold,
|
||||||
|
// updating the cached result to Failure.
|
||||||
|
// After that, resultRun will be reset to 0.
|
||||||
|
expectResult(t, w, results.Failure, msg)
|
||||||
|
// Meeting failureThreshold should cause resultRun to reset to 0
|
||||||
|
if w.resultRun != 0 {
|
||||||
|
t.Errorf("Prober resultRun should be 0, but %d", w.resultRun)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Probe should be on hold and will not be executed anymore
|
||||||
|
// when failureThreshold is met
|
||||||
|
if !w.onHold {
|
||||||
|
t.Errorf("Prober should be on hold because failureThreshold is exceeded")
|
||||||
|
}
|
||||||
|
// Exceeding failureThreshold should cause resultRun to reset to 0
|
||||||
|
if w.resultRun != 0 {
|
||||||
|
t.Errorf("Prober resultRun should be 0, but %d", w.resultRun)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestCleanUp(t *testing.T) {
|
func TestCleanUp(t *testing.T) {
|
||||||
m := newTestManager()
|
m := newTestManager()
|
||||||
|
|
||||||
@@ -366,8 +451,10 @@ func TestOnHoldOnLivenessOrStartupCheckFailure(t *testing.T) {
|
|||||||
msg = "hold lifted"
|
msg = "hold lifted"
|
||||||
expectContinue(t, w, w.doProbe(ctx), msg)
|
expectContinue(t, w, w.doProbe(ctx), msg)
|
||||||
expectResult(t, w, results.Success, msg)
|
expectResult(t, w, results.Success, msg)
|
||||||
if w.onHold {
|
if probeType == liveness && w.onHold {
|
||||||
t.Errorf("Prober should not be on hold anymore")
|
t.Errorf("Prober should not be on hold anymore")
|
||||||
|
} else if probeType == startup && !w.onHold {
|
||||||
|
t.Errorf("Prober should be on hold due to %s check success", probeType)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user