From ad330f7dbe85a6ebc4629e1ff2da2f62b20a254a Mon Sep 17 00:00:00 2001 From: Krzysztof Jastrzebski Date: Mon, 17 Sep 2018 20:42:18 +0200 Subject: [PATCH] Start synchronizing pods after network is ready. --- pkg/kubelet/BUILD | 1 + pkg/kubelet/errors.go | 21 +++++++++++++++++++++ pkg/kubelet/kubelet.go | 4 ++-- pkg/kubelet/pod_workers.go | 9 ++++++++- 4 files changed, 32 insertions(+), 3 deletions(-) create mode 100644 pkg/kubelet/errors.go diff --git a/pkg/kubelet/BUILD b/pkg/kubelet/BUILD index 62e4602f929..4bb15d0ecc5 100644 --- a/pkg/kubelet/BUILD +++ b/pkg/kubelet/BUILD @@ -11,6 +11,7 @@ go_library( srcs = [ "active_deadline.go", "doc.go", + "errors.go", "kubelet.go", "kubelet_getters.go", "kubelet_network.go", diff --git a/pkg/kubelet/errors.go b/pkg/kubelet/errors.go new file mode 100644 index 00000000000..eae36e5ee59 --- /dev/null +++ b/pkg/kubelet/errors.go @@ -0,0 +1,21 @@ +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package kubelet + +const ( + NetworkNotReadyErrorMsg = "network is not ready" +) diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 2888c1736a8..27c0a835aa1 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -1561,8 +1561,8 @@ func (kl *Kubelet) syncPod(o syncPodOptions) error { // If the network plugin is not ready, only start the pod if it uses the host network if rs := kl.runtimeState.networkErrors(); len(rs) != 0 && !kubecontainer.IsHostNetworkPod(pod) { - kl.recorder.Eventf(pod, v1.EventTypeWarning, events.NetworkNotReady, "network is not ready: %v", rs) - return fmt.Errorf("network is not ready: %v", rs) + kl.recorder.Eventf(pod, v1.EventTypeWarning, events.NetworkNotReady, "%s: %v", NetworkNotReadyErrorMsg, rs) + return fmt.Errorf("%s: %v", NetworkNotReadyErrorMsg, rs) } // Create Cgroups for the pod and apply resource parameters diff --git a/pkg/kubelet/pod_workers.go b/pkg/kubelet/pod_workers.go index 5a1fb992725..a42589a99c5 100644 --- a/pkg/kubelet/pod_workers.go +++ b/pkg/kubelet/pod_workers.go @@ -18,6 +18,7 @@ package kubelet import ( "fmt" + "strings" "sync" "time" @@ -96,8 +97,11 @@ const ( // jitter factor for resyncInterval workerResyncIntervalJitterFactor = 0.5 - // jitter factor for backOffPeriod + // jitter factor for backOffPeriod and backOffOnTransientErrorPeriod workerBackOffPeriodJitterFactor = 0.5 + + // backoff period when transient error occurred. + backOffOnTransientErrorPeriod = time.Second ) type podWorkers struct { @@ -263,6 +267,9 @@ func (p *podWorkers) wrapUp(uid types.UID, syncErr error) { case syncErr == nil: // No error; requeue at the regular resync interval. p.workQueue.Enqueue(uid, wait.Jitter(p.resyncInterval, workerResyncIntervalJitterFactor)) + case strings.Contains(syncErr.Error(), NetworkNotReadyErrorMsg): + // Network is not ready; back off for short period of time and retry as network might be ready soon. + p.workQueue.Enqueue(uid, wait.Jitter(backOffOnTransientErrorPeriod, workerBackOffPeriodJitterFactor)) default: // Error occurred during the sync; back off and then retry. p.workQueue.Enqueue(uid, wait.Jitter(p.backOffPeriod, workerBackOffPeriodJitterFactor))