mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-06 02:34:03 +00:00
Merge pull request #22107 from yujuhong/stop_probing
Auto commit by PR queue bot
This commit is contained in:
commit
e7fb1a29e1
@ -61,6 +61,9 @@ type worker struct {
|
|||||||
lastResult results.Result
|
lastResult results.Result
|
||||||
// How many times in a row the probe has returned the same result.
|
// How many times in a row the probe has returned the same result.
|
||||||
resultRun int
|
resultRun int
|
||||||
|
|
||||||
|
// If set, skip probing.
|
||||||
|
onHold bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// Creates and starts a new probe worker.
|
// Creates and starts a new probe worker.
|
||||||
@ -165,6 +168,13 @@ func (w *worker) doProbe() (keepGoing bool) {
|
|||||||
}
|
}
|
||||||
w.containerID = kubecontainer.ParseContainerID(c.ContainerID)
|
w.containerID = kubecontainer.ParseContainerID(c.ContainerID)
|
||||||
w.resultsManager.Set(w.containerID, w.initialValue, w.pod)
|
w.resultsManager.Set(w.containerID, w.initialValue, w.pod)
|
||||||
|
// We've got a new container; resume probing.
|
||||||
|
w.onHold = false
|
||||||
|
}
|
||||||
|
|
||||||
|
if w.onHold {
|
||||||
|
// Worker is on hold until there is a new container.
|
||||||
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
if c.State.Running == nil {
|
if c.State.Running == nil {
|
||||||
@ -203,5 +213,13 @@ func (w *worker) doProbe() (keepGoing bool) {
|
|||||||
|
|
||||||
w.resultsManager.Set(w.containerID, result, w.pod)
|
w.resultsManager.Set(w.containerID, result, w.pod)
|
||||||
|
|
||||||
|
if w.probeType == liveness && result == results.Failure {
|
||||||
|
// The container fails a liveness check, it will need to be restared.
|
||||||
|
// Stop probing until we see a new container ID. This is to reduce the
|
||||||
|
// chance of hitting #21751, where running `docker exec` when a
|
||||||
|
// container is being stopped may lead to corrupted container state.
|
||||||
|
w.onHold = true
|
||||||
|
}
|
||||||
|
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
@ -275,7 +275,7 @@ func TestHandleCrash(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func expectResult(t *testing.T, w *worker, expectedResult results.Result, msg string) {
|
func expectResult(t *testing.T, w *worker, expectedResult results.Result, msg string) {
|
||||||
result, ok := resultsManager(w.probeManager, w.probeType).Get(testContainerID)
|
result, ok := resultsManager(w.probeManager, w.probeType).Get(w.containerID)
|
||||||
if !ok {
|
if !ok {
|
||||||
t.Errorf("[%s - %s] Expected result to be set, but was not set", w.probeType, msg)
|
t.Errorf("[%s - %s] Expected result to be set, but was not set", w.probeType, msg)
|
||||||
} else if result != expectedResult {
|
} else if result != expectedResult {
|
||||||
@ -305,3 +305,38 @@ type crashingExecProber struct{}
|
|||||||
func (p crashingExecProber) Probe(_ exec.Cmd) (probe.Result, string, error) {
|
func (p crashingExecProber) Probe(_ exec.Cmd) (probe.Result, string, error) {
|
||||||
panic("Intentional Probe crash.")
|
panic("Intentional Probe crash.")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestOnHoldOnLivenessCheckFailure(t *testing.T) {
|
||||||
|
m := newTestManager()
|
||||||
|
w := newTestWorker(m, liveness, api.Probe{SuccessThreshold: 1, FailureThreshold: 1})
|
||||||
|
status := getTestRunningStatus()
|
||||||
|
m.statusManager.SetPodStatus(w.pod, getTestRunningStatus())
|
||||||
|
|
||||||
|
// First probe should fail.
|
||||||
|
m.prober.exec = fakeExecProber{probe.Failure, nil}
|
||||||
|
msg := "first probe"
|
||||||
|
expectContinue(t, w, w.doProbe(), msg)
|
||||||
|
expectResult(t, w, results.Failure, msg)
|
||||||
|
if !w.onHold {
|
||||||
|
t.Errorf("Prober should be on hold due to liveness check failure")
|
||||||
|
}
|
||||||
|
// Set fakeExecProber to return success. However, the result will remain
|
||||||
|
// failure because the worker is on hold and won't probe.
|
||||||
|
m.prober.exec = fakeExecProber{probe.Success, nil}
|
||||||
|
msg = "while on hold"
|
||||||
|
expectContinue(t, w, w.doProbe(), msg)
|
||||||
|
expectResult(t, w, results.Failure, msg)
|
||||||
|
if !w.onHold {
|
||||||
|
t.Errorf("Prober should be on hold due to liveness check failure")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set a new container ID to lift the hold. The next probe will succeed.
|
||||||
|
status.ContainerStatuses[0].ContainerID = "test://newCont_ID"
|
||||||
|
m.statusManager.SetPodStatus(w.pod, status)
|
||||||
|
msg = "hold lifted"
|
||||||
|
expectContinue(t, w, w.doProbe(), msg)
|
||||||
|
expectResult(t, w, results.Success, msg)
|
||||||
|
if w.onHold {
|
||||||
|
t.Errorf("Prober should not be on hold anymore")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user