backoff runtime errors in kubelet sync loop

The runtime health check can race with PLEG's first relist, and this
often results in an unnecessary 5 second wait during Kubelet bootstrap.

This change aims to improve the performance.
This commit is contained in:
Michael Taufen
2018-02-22 11:38:56 -08:00
parent fd1527a977
commit 7290313dfd

View File

@@ -20,6 +20,7 @@ import (
"context" "context"
"crypto/tls" "crypto/tls"
"fmt" "fmt"
"math"
"net" "net"
"net/http" "net/http"
"net/url" "net/url"
@@ -1759,12 +1760,22 @@ func (kl *Kubelet) syncLoop(updates <-chan kubetypes.PodUpdate, handler SyncHand
housekeepingTicker := time.NewTicker(housekeepingPeriod) housekeepingTicker := time.NewTicker(housekeepingPeriod)
defer housekeepingTicker.Stop() defer housekeepingTicker.Stop()
plegCh := kl.pleg.Watch() plegCh := kl.pleg.Watch()
const (
base = 100 * time.Millisecond
max = 5 * time.Second
factor = 2
)
duration := base
for { for {
if rs := kl.runtimeState.runtimeErrors(); len(rs) != 0 { if rs := kl.runtimeState.runtimeErrors(); len(rs) != 0 {
glog.Infof("skipping pod synchronization - %v", rs) glog.Infof("skipping pod synchronization - %v", rs)
time.Sleep(5 * time.Second) // exponential backoff
time.Sleep(duration)
duration = time.Duration(math.Min(float64(max), factor*float64(duration)))
continue continue
} }
// reset backoff if we have a success
duration = base
kl.syncLoopMonitor.Store(kl.clock.Now()) kl.syncLoopMonitor.Store(kl.clock.Now())
if !kl.syncLoopIteration(updates, handler, syncTicker.C, housekeepingTicker.C, plegCh) { if !kl.syncLoopIteration(updates, handler, syncTicker.C, housekeepingTicker.C, plegCh) {