From 7290313dfdda4554e19d06e14d8a554e9e884615 Mon Sep 17 00:00:00 2001
From: Michael Taufen <mtaufen@google.com>
Date: Thu, 22 Feb 2018 11:38:56 -0800
Subject: [PATCH] backoff runtime errors in kubelet sync loop

The runtime health check can race with PLEG's first relist, and this
often results in an unnecessary 5 second wait during Kubelet bootstrap.

This change aims to improve the performance.
---
 pkg/kubelet/kubelet.go | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go
index 667eeae8538..6532eb19d3d 100644
--- a/pkg/kubelet/kubelet.go
+++ b/pkg/kubelet/kubelet.go
@@ -20,6 +20,7 @@ import (
 	"context"
 	"crypto/tls"
 	"fmt"
+	"math"
 	"net"
 	"net/http"
 	"net/url"
@@ -1759,12 +1760,22 @@ func (kl *Kubelet) syncLoop(updates <-chan kubetypes.PodUpdate, handler SyncHand
 	housekeepingTicker := time.NewTicker(housekeepingPeriod)
 	defer housekeepingTicker.Stop()
 	plegCh := kl.pleg.Watch()
+	const (
+		base   = 100 * time.Millisecond
+		max    = 5 * time.Second
+		factor = 2
+	)
+	duration := base
 	for {
 		if rs := kl.runtimeState.runtimeErrors(); len(rs) != 0 {
 			glog.Infof("skipping pod synchronization - %v", rs)
-			time.Sleep(5 * time.Second)
+			// exponential backoff
+			time.Sleep(duration)
+			duration = time.Duration(math.Min(float64(max), factor*float64(duration)))
 			continue
 		}
+		// reset backoff if we have a success
+		duration = base
 
 		kl.syncLoopMonitor.Store(kl.clock.Now())
 		if !kl.syncLoopIteration(updates, handler, syncTicker.C, housekeepingTicker.C, plegCh) {