backoff runtime errors in kubelet sync loop

The runtime health check can race with PLEG's first relist, and this often results in an unnecessary 5 second wait during Kubelet bootstrap. This change aims to improve the performance.
2025-09-14 13:45:06 +00:00 · 2018-02-22 11:38:56 -08:00
parent fd1527a977
commit 7290313dfd
1 changed files with 12 additions and 1 deletions
--- a/pkg/kubelet/kubelet.go
+++ b/pkg/kubelet/kubelet.go
@@ -20,6 +20,7 @@ import (
 	"context"
 	"crypto/tls"
 	"fmt"
 	"math"
 	"net"
 	"net/http"
 	"net/url"
@@ -1759,12 +1760,22 @@ func (kl *Kubelet) syncLoop(updates <-chan kubetypes.PodUpdate, handler SyncHand
 	housekeepingTicker := time.NewTicker(housekeepingPeriod)
 	defer housekeepingTicker.Stop()
 	plegCh := kl.pleg.Watch()
 	const (
 		base   = 100 * time.Millisecond
 		max    = 5 * time.Second
 		factor = 2
 	)
 	duration := base
 	for {
 		if rs := kl.runtimeState.runtimeErrors(); len(rs) != 0 {
 			glog.Infof("skipping pod synchronization - %v", rs)
-			time.Sleep(5 * time.Second)
+			// exponential backoff
 			time.Sleep(duration)
 			duration = time.Duration(math.Min(float64(max), factor*float64(duration)))
 			continue
 		}
 		// reset backoff if we have a success
 		duration = base
 		kl.syncLoopMonitor.Store(kl.clock.Now())
 		if !kl.syncLoopIteration(updates, handler, syncTicker.C, housekeepingTicker.C, plegCh) {