diff --git a/cluster/aws/templates/configure-vm-aws.sh b/cluster/aws/templates/configure-vm-aws.sh index 6abca6ed436..0be5203add1 100755 --- a/cluster/aws/templates/configure-vm-aws.sh +++ b/cluster/aws/templates/configure-vm-aws.sh @@ -91,7 +91,6 @@ EOF if [[ ! -z "${KUBELET_APISERVER:-}" ]] && [[ ! -z "${KUBELET_CERT:-}" ]] && [[ ! -z "${KUBELET_KEY:-}" ]]; then cat <>/etc/salt/minion.d/grains.conf kubelet_api_servers: '${KUBELET_APISERVER}' - cbr-cidr: 10.123.45.0/29 EOF else # If the kubelet is running disconnected from a master, give it a fixed @@ -110,7 +109,6 @@ salt-node-role() { grains: roles: - kubernetes-pool - cbr-cidr: 10.123.45.0/29 cloud: aws api_servers: '${API_SERVERS}' EOF diff --git a/cluster/gce/configure-vm.sh b/cluster/gce/configure-vm.sh index 968258edd87..609fd582d89 100755 --- a/cluster/gce/configure-vm.sh +++ b/cluster/gce/configure-vm.sh @@ -958,7 +958,6 @@ EOF if [[ ! -z "${KUBELET_APISERVER:-}" ]] && [[ ! -z "${KUBELET_CERT:-}" ]] && [[ ! -z "${KUBELET_KEY:-}" ]]; then cat <>/etc/salt/minion.d/grains.conf kubelet_api_servers: '${KUBELET_APISERVER}' - cbr-cidr: 10.123.45.0/29 EOF else # If the kubelet is running disconnected from a master, give it a fixed @@ -977,7 +976,6 @@ function salt-node-role() { grains: roles: - kubernetes-pool - cbr-cidr: 10.123.45.0/29 cloud: gce api_servers: '${KUBERNETES_MASTER_NAME}' EOF diff --git a/cluster/gce/gci/configure-helper.sh b/cluster/gce/gci/configure-helper.sh index 583c8767cf3..b02f7846d95 100644 --- a/cluster/gce/gci/configure-helper.sh +++ b/cluster/gce/gci/configure-helper.sh @@ -483,11 +483,8 @@ function start-kubelet { if [[ ! -z "${KUBELET_APISERVER:-}" && ! -z "${KUBELET_CERT:-}" && ! -z "${KUBELET_KEY:-}" ]]; then flags+=" --api-servers=https://${KUBELET_APISERVER}" flags+=" --register-schedulable=false" - # need at least a /29 pod cidr for now due to #32844 - # TODO: determine if we still allow non-hostnetwork pods to run on master, clean up master pod setup - # WARNING: potential ip range collision with 10.123.45.0/29 - flags+=" --pod-cidr=10.123.45.0/29" else + # Standalone mode (not widely used?) flags+=" --pod-cidr=${MASTER_IP_RANGE}" fi else # For nodes diff --git a/cluster/gce/trusty/configure-helper.sh b/cluster/gce/trusty/configure-helper.sh index 9e04aafa206..33be3919f5c 100644 --- a/cluster/gce/trusty/configure-helper.sh +++ b/cluster/gce/trusty/configure-helper.sh @@ -155,7 +155,7 @@ assemble_kubelet_flags() { if [ ! -z "${KUBELET_APISERVER:-}" ] && \ [ ! -z "${KUBELET_CERT:-}" ] && \ [ ! -z "${KUBELET_KEY:-}" ]; then - KUBELET_CMD_FLAGS="${KUBELET_CMD_FLAGS} --api-servers=https://${KUBELET_APISERVER} --register-schedulable=false --pod-cidr=10.123.45.0/29" + KUBELET_CMD_FLAGS="${KUBELET_CMD_FLAGS} --api-servers=https://${KUBELET_APISERVER} --register-schedulable=false" else KUBELET_CMD_FLAGS="${KUBELET_CMD_FLAGS} --pod-cidr=${MASTER_IP_RANGE}" fi diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 42054573b1f..e4fcaee1a7e 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -1422,6 +1422,11 @@ func (kl *Kubelet) syncPod(o syncPodOptions) error { return syncErr } + // If the network plugin is not ready, only start the pod if it uses the host network + if rs := kl.runtimeState.networkErrors(); len(rs) != 0 && !podUsesHostNetwork(pod) { + return fmt.Errorf("network is not ready: %v", rs) + } + // Create Cgroups for the pod and apply resource parameters // to them if cgroup-per-qos flag is enabled. pcm := kl.containerManager.NewPodContainerManager() @@ -1696,7 +1701,7 @@ func (kl *Kubelet) syncLoop(updates <-chan kubetypes.PodUpdate, handler SyncHand defer housekeepingTicker.Stop() plegCh := kl.pleg.Watch() for { - if rs := kl.runtimeState.errors(); len(rs) != 0 { + if rs := kl.runtimeState.runtimeErrors(); len(rs) != 0 { glog.Infof("skipping pod synchronization - %v", rs) time.Sleep(5 * time.Second) continue diff --git a/pkg/kubelet/kubelet_node_status.go b/pkg/kubelet/kubelet_node_status.go index cd4c6836361..d616f41d4bf 100644 --- a/pkg/kubelet/kubelet_node_status.go +++ b/pkg/kubelet/kubelet_node_status.go @@ -591,7 +591,8 @@ func (kl *Kubelet) setNodeReadyCondition(node *api.Node) { // ref: https://github.com/kubernetes/kubernetes/issues/16961 currentTime := unversioned.NewTime(kl.clock.Now()) var newNodeReadyCondition api.NodeCondition - if rs := kl.runtimeState.errors(); len(rs) == 0 { + rs := append(kl.runtimeState.runtimeErrors(), kl.runtimeState.networkErrors()...) + if len(rs) == 0 { newNodeReadyCondition = api.NodeCondition{ Type: api.NodeReady, Status: api.ConditionTrue, diff --git a/pkg/kubelet/kubelet_test.go b/pkg/kubelet/kubelet_test.go index e17807222d2..767f286d237 100644 --- a/pkg/kubelet/kubelet_test.go +++ b/pkg/kubelet/kubelet_test.go @@ -1054,6 +1054,48 @@ func TestPrivilegedContainerDisallowed(t *testing.T) { assert.Error(t, err, "expected pod infra creation to fail") } +func TestNetworkErrorsWithoutHostNetwork(t *testing.T) { + testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */) + testKubelet.fakeCadvisor.On("VersionInfo").Return(&cadvisorapi.VersionInfo{}, nil) + testKubelet.fakeCadvisor.On("MachineInfo").Return(&cadvisorapi.MachineInfo{}, nil) + testKubelet.fakeCadvisor.On("ImagesFsInfo").Return(cadvisorapiv2.FsInfo{}, nil) + testKubelet.fakeCadvisor.On("RootFsInfo").Return(cadvisorapiv2.FsInfo{}, nil) + kubelet := testKubelet.kubelet + + kubelet.runtimeState.setNetworkState(fmt.Errorf("simulated network error")) + capabilities.SetForTests(capabilities.Capabilities{ + PrivilegedSources: capabilities.PrivilegedSources{ + HostNetworkSources: []string{kubetypes.ApiserverSource, kubetypes.FileSource}, + }, + }) + + pod := podWithUidNameNsSpec("12345678", "hostnetwork", "new", api.PodSpec{ + SecurityContext: &api.PodSecurityContext{ + HostNetwork: false, + }, + Containers: []api.Container{ + {Name: "foo"}, + }, + }) + + kubelet.podManager.SetPods([]*api.Pod{pod}) + err := kubelet.syncPod(syncPodOptions{ + pod: pod, + podStatus: &kubecontainer.PodStatus{}, + updateType: kubetypes.SyncPodUpdate, + }) + assert.Error(t, err, "expected pod with hostNetwork=false to fail when network in error") + + pod.Annotations[kubetypes.ConfigSourceAnnotationKey] = kubetypes.FileSource + pod.Spec.SecurityContext.HostNetwork = true + err = kubelet.syncPod(syncPodOptions{ + pod: pod, + podStatus: &kubecontainer.PodStatus{}, + updateType: kubetypes.SyncPodUpdate, + }) + assert.NoError(t, err, "expected pod with hostNetwork=true to succeed when network in error") +} + func TestFilterOutTerminatedPods(t *testing.T) { testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */) kubelet := testKubelet.kubelet diff --git a/pkg/kubelet/runonce_test.go b/pkg/kubelet/runonce_test.go index c611f5ad0f6..8862dd1da4a 100644 --- a/pkg/kubelet/runonce_test.go +++ b/pkg/kubelet/runonce_test.go @@ -83,6 +83,7 @@ func TestRunOnce(t *testing.T) { kubeClient: &fake.Clientset{}, hostname: testKubeletHostname, nodeName: testKubeletHostname, + runtimeState: newRuntimeState(time.Second), } kb.containerManager = cm.NewStubContainerManager() diff --git a/pkg/kubelet/runtime.go b/pkg/kubelet/runtime.go index 90a83898a31..6cb74fe364c 100644 --- a/pkg/kubelet/runtime.go +++ b/pkg/kubelet/runtime.go @@ -68,16 +68,13 @@ func (s *runtimeState) setInitError(err error) { s.initError = err } -func (s *runtimeState) errors() []string { +func (s *runtimeState) runtimeErrors() []string { s.RLock() defer s.RUnlock() var ret []string if s.initError != nil { ret = append(ret, s.initError.Error()) } - if s.networkError != nil { - ret = append(ret, s.networkError.Error()) - } if !s.lastBaseRuntimeSync.Add(s.baseRuntimeSyncThreshold).After(time.Now()) { ret = append(ret, "container runtime is down") } @@ -87,6 +84,16 @@ func (s *runtimeState) errors() []string { return ret } +func (s *runtimeState) networkErrors() []string { + s.RLock() + defer s.RUnlock() + var ret []string + if s.networkError != nil { + ret = append(ret, s.networkError.Error()) + } + return ret +} + func newRuntimeState( runtimeSyncThreshold time.Duration, ) *runtimeState {