From 7290313dfdda4554e19d06e14d8a554e9e884615 Mon Sep 17 00:00:00 2001 From: Michael Taufen Date: Thu, 22 Feb 2018 11:38:56 -0800 Subject: [PATCH] backoff runtime errors in kubelet sync loop The runtime health check can race with PLEG's first relist, and this often results in an unnecessary 5 second wait during Kubelet bootstrap. This change aims to improve the performance. --- pkg/kubelet/kubelet.go | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 667eeae8538..6532eb19d3d 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -20,6 +20,7 @@ import ( "context" "crypto/tls" "fmt" + "math" "net" "net/http" "net/url" @@ -1759,12 +1760,22 @@ func (kl *Kubelet) syncLoop(updates <-chan kubetypes.PodUpdate, handler SyncHand housekeepingTicker := time.NewTicker(housekeepingPeriod) defer housekeepingTicker.Stop() plegCh := kl.pleg.Watch() + const ( + base = 100 * time.Millisecond + max = 5 * time.Second + factor = 2 + ) + duration := base for { if rs := kl.runtimeState.runtimeErrors(); len(rs) != 0 { glog.Infof("skipping pod synchronization - %v", rs) - time.Sleep(5 * time.Second) + // exponential backoff + time.Sleep(duration) + duration = time.Duration(math.Min(float64(max), factor*float64(duration))) continue } + // reset backoff if we have a success + duration = base kl.syncLoopMonitor.Store(kl.clock.Now()) if !kl.syncLoopIteration(updates, handler, syncTicker.C, housekeepingTicker.C, plegCh) {