Improve update time of readiness state

This commit is contained in:
Matthias Bertschy 2020-12-07 21:43:04 +01:00
parent 96be00df69
commit 4870e64ac1
3 changed files with 37 additions and 11 deletions

View File

@ -17,7 +17,9 @@ limitations under the License.
package prober
import (
"k8s.io/apimachinery/pkg/util/clock"
"sync"
"time"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
@ -94,6 +96,8 @@ type manager struct {
// prober executes the probe actions.
prober *prober
start time.Time
}
// NewManager creates a Manager for pod probing.
@ -113,6 +117,7 @@ func NewManager(
livenessManager: livenessManager,
startupManager: startupManager,
workers: make(map[probeKey]*worker),
start: clock.RealClock{}.Now(),
}
}
@ -253,8 +258,15 @@ func (m *manager) UpdatePodStatus(podUID types.UID, podStatus *v1.PodStatus) {
ready = result == results.Success
} else {
// The check whether there is a probe which hasn't run yet.
_, exists := m.getWorker(podUID, c.Name, readiness)
ready = !exists
w, exists := m.getWorker(podUID, c.Name, readiness)
ready = !exists // no readinessProbe -> always ready
if exists {
// Trigger an immediate run of the readinessProbe to update ready state
select {
case w.manualTriggerCh <- struct{}{}:
default: // Non-blocking.
}
}
}
podStatus.ContainerStatuses[i].Ready = ready
}

View File

@ -38,6 +38,9 @@ type worker struct {
// Channel for stopping the probe.
stopCh chan struct{}
// Channel for triggering the probe manually.
manualTriggerCh chan struct{}
// The pod containing this probe (read-only)
pod *v1.Pod
@ -82,11 +85,12 @@ func newWorker(
container v1.Container) *worker {
w := &worker{
stopCh: make(chan struct{}, 1), // Buffer so stop() can be non-blocking.
pod: pod,
container: container,
probeType: probeType,
probeManager: m,
stopCh: make(chan struct{}, 1), // Buffer so stop() can be non-blocking.
manualTriggerCh: make(chan struct{}, 1), // Buffer so prober_manager can do non-blocking calls to doProbe.
pod: pod,
container: container,
probeType: probeType,
probeManager: m,
}
switch probeType {
@ -130,7 +134,10 @@ func (w *worker) run() {
// If kubelet restarted the probes could be started in rapid succession.
// Let the worker wait for a random portion of tickerPeriod before probing.
time.Sleep(time.Duration(rand.Float64() * float64(probeTickerPeriod)))
// Do it only if the kubelet has started recently.
if probeTickerPeriod > time.Since(w.probeManager.start) {
time.Sleep(time.Duration(rand.Float64() * float64(probeTickerPeriod)))
}
probeTicker := time.NewTicker(probeTickerPeriod)
@ -154,6 +161,7 @@ probeLoop:
case <-w.stopCh:
break probeLoop
case <-probeTicker.C:
case <-w.manualTriggerCh:
// continue
}
}

View File

@ -374,7 +374,8 @@ var _ = SIGDescribe("Probing container", func() {
Description: A Pod is created with startup and readiness probes. The Container is started by creating /tmp/startup after 45 seconds, delaying the ready state by this amount of time. This is similar to the "Pod readiness probe, with initial delay" test.
*/
ginkgo.It("should not be ready until startupProbe succeeds", func() {
cmd := []string{"/bin/sh", "-c", "echo ok >/tmp/health; sleep 45; echo ok >/tmp/startup; sleep 600"}
sleepBeforeStarted := time.Duration(45)
cmd := []string{"/bin/sh", "-c", fmt.Sprintf("echo ok >/tmp/health; sleep %d; echo ok >/tmp/startup; sleep 600", sleepBeforeStarted)}
readinessProbe := &v1.Probe{
Handler: v1.Handler{
Exec: &v1.ExecAction{
@ -382,6 +383,7 @@ var _ = SIGDescribe("Probing container", func() {
},
},
InitialDelaySeconds: 0,
PeriodSeconds: 60,
}
startupProbe := &v1.Probe{
Handler: v1.Handler{
@ -390,7 +392,8 @@ var _ = SIGDescribe("Probing container", func() {
},
},
InitialDelaySeconds: 0,
FailureThreshold: 60,
PeriodSeconds: 1,
FailureThreshold: 600,
}
p := podClient.Create(startupPodSpec(startupProbe, readinessProbe, nil, cmd))
@ -414,9 +417,12 @@ var _ = SIGDescribe("Probing container", func() {
framework.ExpectNoError(err)
framework.Logf("Container started at %v, pod became ready at %v", startedTime, readyTime)
if readyTime.Sub(startedTime) < 40*time.Second {
if readyTime.Sub(startedTime) < sleepBeforeStarted*time.Second {
framework.Failf("Pod became ready before startupProbe succeeded")
}
if readyTime.Sub(startedTime) > (sleepBeforeStarted+20)*time.Second {
framework.Failf("Pod became ready more than 20s after startupProbe succeeded")
}
})
})