Merge pull request #45110 from smarterclayton/offset_timeouts

Automatic merge from submit-queue (batch tested with PRs 45110, 45148)

Make timeouts in the Kubelet slightly offset to aid debugging

Several of these loops overlap, and when they are the reason a failure
is happening it is difficult to sort them out. Slighly misalign these
loops to make their impact obvious.

We are seeing exactly 2 minute pod worker timeouts in a wide range of test flake scenarios, and I want to be confident we know exactly which one is the culprit.
This commit is contained in:
Kubernetes Submit Queue 2017-05-01 05:42:14 -07:00 committed by GitHub
commit 6480bc70b0
5 changed files with 15 additions and 5 deletions

View File

@ -70,7 +70,9 @@ var _ DockerInterface = &kubeDockerClient{}
// kubeDockerClient only applies timeout on non-long running operations.
const (
// defaultTimeout is the default timeout of short running docker operations.
defaultTimeout = 2 * time.Minute
// Value is slightly offset from 2 minutes to make timeouts due to this
// constant recognizable.
defaultTimeout = 2*time.Minute - 1*time.Second
// defaultShmSize is the default ShmSize to use (in bytes) if not specified.
defaultShmSize = int64(1024 * 1024 * 64)

View File

@ -135,7 +135,9 @@ const (
defaultNetworkName = "rkt.kubernetes.io"
// defaultRequestTimeout is the default timeout of rkt requests.
defaultRequestTimeout = 2 * time.Minute
// Value is slightly offset from 2 minutes to make timeouts due to this
// constant recognizable.
defaultRequestTimeout = 2*time.Minute - 1*time.Second
etcHostsPath = "/etc/hosts"
etcResolvConfPath = "/etc/resolv.conf"

View File

@ -71,7 +71,9 @@ const (
// will retry in the next sync iteration. This frees the associated
// goroutine of the pod to process newer updates if needed (e.g., a delete
// request to the pod).
podAttachAndMountTimeout time.Duration = 2 * time.Minute
// Value is slightly offset from 2 minutes to make timeouts due to this
// constant recognizable.
podAttachAndMountTimeout time.Duration = 2*time.Minute + 3*time.Second
// podAttachAndMountRetryInterval is the amount of time the GetVolumesForPod
// call waits before retrying

View File

@ -32,7 +32,9 @@ const (
// maxDurationBeforeRetry is the maximum amount of time that
// durationBeforeRetry will grow to due to exponential backoff.
maxDurationBeforeRetry time.Duration = 2 * time.Minute
// Value is slightly offset from 2 minutes to make timeouts due to this
// constant recognizable.
maxDurationBeforeRetry time.Duration = 2*time.Minute + 2*time.Second
)
// ExponentialBackoff contains the last occurrence of an error and the duration

View File

@ -40,7 +40,9 @@ const (
// maxDurationBeforeRetry is the maximum amount of time that
// durationBeforeRetry will grow to due to exponential backoff.
maxDurationBeforeRetry = 2 * time.Minute
// Value is slightly offset from 2 minutes to make timeouts due to this
// constant recognizable.
maxDurationBeforeRetry = 2*time.Minute + 1*time.Second
)
// GoRoutineMap defines a type that can run named goroutines and track their