diff --git a/cluster/saltbase/salt/docker/docker-defaults b/cluster/saltbase/salt/docker/docker-defaults index 7e5725064d1..f325b4945d5 100644 --- a/cluster/saltbase/salt/docker/docker-defaults +++ b/cluster/saltbase/salt/docker/docker-defaults @@ -2,6 +2,5 @@ DOCKER_OPTS="" {% if grains.docker_opts is defined and grains.docker_opts %} DOCKER_OPTS="${DOCKER_OPTS} {{grains.docker_opts}}" {% endif %} - DOCKER_OPTS="${DOCKER_OPTS} --bridge=cbr0 --iptables=false --ip-masq=false" DOCKER_NOFILE=1000000 diff --git a/contrib/mesos/pkg/executor/service/service.go b/contrib/mesos/pkg/executor/service/service.go index dd3b6605b9e..06ffbd6255d 100644 --- a/contrib/mesos/pkg/executor/service/service.go +++ b/contrib/mesos/pkg/executor/service/service.go @@ -354,6 +354,7 @@ func (ks *KubeletExecutorServer) createAndInitKubelet( kc.DockerDaemonContainer, kc.SystemContainer, kc.ConfigureCBR0, + kc.PodCIDR, kc.MaxPods, kc.DockerExecHandler, ) diff --git a/pkg/kubelet/container_bridge.go b/pkg/kubelet/container_bridge.go index 0aaa885195c..5fb6319ec5a 100644 --- a/pkg/kubelet/container_bridge.go +++ b/pkg/kubelet/container_bridge.go @@ -23,6 +23,7 @@ import ( "os/exec" "regexp" + "github.com/GoogleCloudPlatform/kubernetes/pkg/util" "github.com/golang/glog" ) @@ -43,10 +44,16 @@ func createCBR0(wantCIDR *net.IPNet) error { return err } // restart docker - if err := exec.Command("service", "docker", "restart").Run(); err != nil { - glog.Error(err) - // For now just log the error. The containerRuntime check will catch docker failures. - // TODO (dawnchen) figure out what we should do for rkt here. + // For now just log the error. The containerRuntime check will catch docker failures. + // TODO (dawnchen) figure out what we should do for rkt here. + if util.UsingSystemdInitSystem() { + if err := exec.Command("systemctl", "restart", "docker").Run(); err != nil { + glog.Error(err) + } + } else { + if err := exec.Command("service", "docker", "restart").Run(); err != nil { + glog.Error(err) + } } glog.V(2).Info("Recreated cbr0 and restarted docker") return nil @@ -60,7 +67,8 @@ func ensureCbr0(wantCIDR *net.IPNet) error { if !exists { glog.V(2).Infof("CBR0 doesn't exist, attempting to create it with range: %s", wantCIDR) return createCBR0(wantCIDR) - } else if !cbr0CidrCorrect(wantCIDR) { + } + if !cbr0CidrCorrect(wantCIDR) { glog.V(2).Infof("Attempting to recreate cbr0 with address range: %s", wantCIDR) // delete cbr0 @@ -78,8 +86,7 @@ func ensureCbr0(wantCIDR *net.IPNet) error { } func cbr0Exists() (bool, error) { - _, err := os.Stat("/sys/class/net/cbr0") - if err != nil { + if _, err := os.Stat("/sys/class/net/cbr0"); err != nil { if os.IsNotExist(err) { return false, nil } @@ -103,6 +110,7 @@ func cbr0CidrCorrect(wantCIDR *net.IPNet) bool { return false } cbr0CIDR.IP = cbr0IP + glog.V(5).Infof("Want cbr0 CIDR: %s, have cbr0 CIDR: %s", wantCIDR, cbr0CIDR) return wantCIDR.IP.Equal(cbr0IP) && bytes.Equal(wantCIDR.Mask, cbr0CIDR.Mask) } diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 537c95bed48..9c0e4260748 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -321,8 +321,8 @@ func NewMainKubelet( klet.containerManager = containerManager // Start syncing node status immediately, this may set up things the runtime needs to run. + go util.Until(klet.syncNetworkStatus, 30*time.Second, util.NeverStop) go klet.syncNodeStatus() - go klet.syncNetworkStatus() // Wait for the runtime to be up with a timeout. if err := waitUntilRuntimeIsUp(klet.containerRuntime, maxWaitForContainerRuntime); err != nil { @@ -419,7 +419,8 @@ type Kubelet struct { lastTimestampRuntimeUp time.Time // Network Status information - networkConfigured bool + networkConfigMutex sync.Mutex + networkConfigured bool // Volume plugins. volumePluginMgr volume.VolumePluginMgr @@ -717,6 +718,7 @@ func (kl *Kubelet) Run(updates <-chan PodUpdate) { } go util.Until(kl.updateRuntimeUp, 5*time.Second, util.NeverStop) + // Run the system oom watcher forever. kl.statusManager.Start() kl.syncLoop(updates, kl) @@ -1714,9 +1716,10 @@ func (kl *Kubelet) syncLoopIteration(updates <-chan PodUpdate, handler SyncHandl glog.Infof("Skipping pod synchronization, container runtime is not up.") return } - if !kl.networkConfigured { + if !kl.doneNetworkConfigure() { time.Sleep(5 * time.Second) glog.Infof("Skipping pod synchronization, network is not configured") + return } unsyncedPod := false podSyncTypes := make(map[types.UID]SyncPodType) @@ -1871,6 +1874,7 @@ func (kl *Kubelet) reconcileCBR0(podCIDR string) error { glog.V(5).Info("PodCIDR not set. Will not configure cbr0.") return nil } + glog.V(5).Infof("PodCIDR is set to %q", podCIDR) _, cidr, err := net.ParseCIDR(podCIDR) if err != nil { return err @@ -1906,19 +1910,19 @@ func (kl *Kubelet) recordNodeStatusEvent(event string) { var oldNodeUnschedulable bool func (kl *Kubelet) syncNetworkStatus() { - for { - networkConfigured := true - if kl.configureCBR0 { - if len(kl.podCIDR) == 0 { - networkConfigured = false - } else if err := kl.reconcileCBR0(kl.podCIDR); err != nil { - networkConfigured = false - glog.Errorf("Error configuring cbr0: %v", err) - } + kl.networkConfigMutex.Lock() + defer kl.networkConfigMutex.Unlock() + + networkConfigured := true + if kl.configureCBR0 { + if len(kl.podCIDR) == 0 { + networkConfigured = false + } else if err := kl.reconcileCBR0(kl.podCIDR); err != nil { + networkConfigured = false + glog.Errorf("Error configuring cbr0: %v", err) } - kl.networkConfigured = networkConfigured - time.Sleep(30 * time.Second) } + kl.networkConfigured = networkConfigured } // setNodeStatus fills in the Status fields of the given Node, overwriting @@ -1997,11 +2001,13 @@ func (kl *Kubelet) setNodeStatus(node *api.Node) error { // Check whether container runtime can be reported as up. containerRuntimeUp := kl.containerRuntimeUp() + // Check whether network is configured properly + networkConfigured := kl.doneNetworkConfigure() currentTime := util.Now() var newNodeReadyCondition api.NodeCondition var oldNodeReadyConditionStatus api.ConditionStatus - if containerRuntimeUp && kl.networkConfigured { + if containerRuntimeUp && networkConfigured { newNodeReadyCondition = api.NodeCondition{ Type: api.NodeReady, Status: api.ConditionTrue, @@ -2013,7 +2019,7 @@ func (kl *Kubelet) setNodeStatus(node *api.Node) error { if !containerRuntimeUp { reasons = append(reasons, "container runtime is down") } - if !kl.networkConfigured { + if !networkConfigured { reasons = append(reasons, "network not configured correctly") } newNodeReadyCondition = api.NodeCondition{ @@ -2065,6 +2071,12 @@ func (kl *Kubelet) containerRuntimeUp() bool { return kl.lastTimestampRuntimeUp.Add(kl.runtimeUpThreshold).After(time.Now()) } +func (kl *Kubelet) doneNetworkConfigure() bool { + kl.networkConfigMutex.Lock() + defer kl.networkConfigMutex.Unlock() + return kl.networkConfigured +} + // tryUpdateNodeStatus tries to update node status to master. If ReconcileCBR0 // is set, this function will also confirm that cbr0 is configured correctly. func (kl *Kubelet) tryUpdateNodeStatus() error { diff --git a/pkg/kubelet/kubelet_test.go b/pkg/kubelet/kubelet_test.go index 96183eda521..4957f57906d 100644 --- a/pkg/kubelet/kubelet_test.go +++ b/pkg/kubelet/kubelet_test.go @@ -127,6 +127,7 @@ func newTestKubelet(t *testing.T) *TestKubelet { } kubelet.volumeManager = newVolumeManager() kubelet.containerManager, _ = newContainerManager(mockCadvisor, "", "", "") + kubelet.networkConfigured = true return &TestKubelet{kubelet, fakeRuntime, mockCadvisor, fakeKubeClient, fakeMirrorClient} } diff --git a/pkg/kubelet/status_manager.go b/pkg/kubelet/status_manager.go index d8bf9bbb4e9..15495f0a41a 100644 --- a/pkg/kubelet/status_manager.go +++ b/pkg/kubelet/status_manager.go @@ -21,7 +21,6 @@ import ( "fmt" "reflect" "sync" - "time" "github.com/GoogleCloudPlatform/kubernetes/pkg/api" "github.com/GoogleCloudPlatform/kubernetes/pkg/client" @@ -60,8 +59,6 @@ func (s *statusManager) Start() { err := s.syncBatch() if err != nil { glog.Warningf("Failed to updated pod status: %v", err) - // Errors and tight-looping are bad, m-kay - time.Sleep(30 * time.Second) } }, 0) } diff --git a/pkg/util/util.go b/pkg/util/util.go index 17f56187ea4..fc335b95f8a 100644 --- a/pkg/util/util.go +++ b/pkg/util/util.go @@ -198,6 +198,20 @@ func CompileRegexps(regexpStrings []string) ([]*regexp.Regexp, error) { return regexps, nil } +// Detects if using systemd as the init system +// Please note that simply reading /proc/1/cmdline can be misleading because +// some installation of various init programs can automatically make /sbin/init +// a symlink or even a renamed version of their main program. +// TODO(dchen1107): realiably detects the init system using on the system: +// systemd, upstart, initd, etc. +func UsingSystemdInitSystem() bool { + if _, err := os.Stat("/run/systemd/system"); err != nil { + return true + } + + return false +} + // Writes 'value' to /proc//oom_score_adj. PID = 0 means self func ApplyOomScoreAdj(pid int, value int) error { if value < -1000 || value > 1000 {