From 9aa0efa393a60a632ae023973fa8611a6cd15d1e Mon Sep 17 00:00:00 2001 From: Prashanth Balasubramanian Date: Mon, 23 Nov 2015 18:11:51 -0800 Subject: [PATCH] Turn flannel off by default --- cluster/gce/config-default.sh | 2 +- cluster/gce/config-test.sh | 4 +- .../flannel-server/flannel-server.manifest | 12 ++--- .../saltbase/salt/flannel-server/network.json | 4 +- cluster/saltbase/salt/flannel/init.sls | 2 + .../kube-controller-manager.manifest | 8 +++- cluster/saltbase/salt/kubelet/default | 7 ++- cluster/saltbase/salt/top.sls | 4 ++ .../app/controllermanager.go | 5 ++- cmd/kubelet/app/server.go | 23 ++++------ docs/admin/kubelet.md | 4 +- hack/verify-flags/known-flags.txt | 2 +- pkg/kubelet/flannel_helper.go | 11 +++-- pkg/kubelet/kubelet.go | 44 ++++++++++++++----- 14 files changed, 83 insertions(+), 49 deletions(-) diff --git a/cluster/gce/config-default.sh b/cluster/gce/config-default.sh index 8fd2fa8422f..3fb21a0f062 100755 --- a/cluster/gce/config-default.sh +++ b/cluster/gce/config-default.sh @@ -113,7 +113,7 @@ ADMISSION_CONTROL=NamespaceLifecycle,LimitRanger,ServiceAccount,ResourceQuota KUBE_UP_AUTOMATIC_CLEANUP=${KUBE_UP_AUTOMATIC_CLEANUP:-false} # OpenContrail networking plugin specific settings -NETWORK_PROVIDER="${NETWORK_PROVIDER:-none}" # opencontrail +NETWORK_PROVIDER="${NETWORK_PROVIDER:-none}" # opencontrail, flannel OPENCONTRAIL_TAG="${OPENCONTRAIL_TAG:-R2.20}" OPENCONTRAIL_KUBERNETES_TAG="${OPENCONTRAIL_KUBERNETES_TAG:-master}" OPENCONTRAIL_PUBLIC_SUBNET="${OPENCONTRAIL_PUBLIC_SUBNET:-10.1.0.0/16}" diff --git a/cluster/gce/config-test.sh b/cluster/gce/config-test.sh index 5eb4046ecc7..d0d4153ace3 100755 --- a/cluster/gce/config-test.sh +++ b/cluster/gce/config-test.sh @@ -125,12 +125,10 @@ KUBE_UP_AUTOMATIC_CLEANUP=${KUBE_UP_AUTOMATIC_CLEANUP:-false} TEST_CLUSTER="${TEST_CLUSTER:-true}" # OpenContrail networking plugin specific settings -NETWORK_PROVIDER="${NETWORK_PROVIDER:-none}" # opencontrail +NETWORK_PROVIDER="${NETWORK_PROVIDER:-none}" # opencontrail,flannel OPENCONTRAIL_TAG="${OPENCONTRAIL_TAG:-R2.20}" OPENCONTRAIL_KUBERNETES_TAG="${OPENCONTRAIL_KUBERNETES_TAG:-master}" OPENCONTRAIL_PUBLIC_SUBNET="${OPENCONTRAIL_PUBLIC_SUBNET:-10.1.0.0/16}" # Optional: if set to true, kube-up will configure the cluster to run e2e tests. E2E_STORAGE_TEST_ENVIRONMENT=${KUBE_E2E_STORAGE_TEST_ENVIRONMENT:-false} -# Overlay network settings -OVERLAY_NETWORK=${OVERLAY_NETWORK:-true} diff --git a/cluster/saltbase/salt/flannel-server/flannel-server.manifest b/cluster/saltbase/salt/flannel-server/flannel-server.manifest index a0811ec0837..906d1354c13 100644 --- a/cluster/saltbase/salt/flannel-server/flannel-server.manifest +++ b/cluster/saltbase/salt/flannel-server/flannel-server.manifest @@ -2,17 +2,17 @@ "kind": "Pod", "apiVersion": "v1", "metadata": { - "name": "flannel-helper", + "name": "flannel-server", "namespace": "kube-system", "labels": { - "app": "flannel-helper", + "app": "flannel-server", "version": "v0.1" } }, "spec": { "volumes": [ { - "name": "varlogflannel", + "name": "varlog", "hostPath": { "path": "/var/log" } @@ -30,8 +30,8 @@ ], "containers": [ { - "name": "flannel-helper", - "image": "bprashanth/flannel-helper:0.1", + "name": "flannel-server-helper", + "image": "gcr.io/google_containers/flannel-server-helper:0.1", "args": [ "--network-config=/etc/kubernetes/network.json", "--etcd-prefix=/kubernetes.io/network", @@ -66,7 +66,7 @@ }, "volumeMounts": [ { - "name": "varlogflannel", + "name": "varlog", "mountPath": "/var/log" } ] diff --git a/cluster/saltbase/salt/flannel-server/network.json b/cluster/saltbase/salt/flannel-server/network.json index 2d199ea86e1..b0a6bd4560a 100644 --- a/cluster/saltbase/salt/flannel-server/network.json +++ b/cluster/saltbase/salt/flannel-server/network.json @@ -1,6 +1,6 @@ { - "Network": "192.168.0.0/16", - "SubnetLen": 26, + "Network": "172.16.0.0/12", + "SubnetLen": 24, "Backend": { "Type": "vxlan", "VNI": 1 diff --git a/cluster/saltbase/salt/flannel/init.sls b/cluster/saltbase/salt/flannel/init.sls index 8f36c953d3a..ee746d241ae 100644 --- a/cluster/saltbase/salt/flannel/init.sls +++ b/cluster/saltbase/salt/flannel/init.sls @@ -1,3 +1,5 @@ +# TODO: Run flannel daemon in a static pod once we've moved the overlay network +# setup into a network plugin. flannel-tar: archive: - extracted diff --git a/cluster/saltbase/salt/kube-controller-manager/kube-controller-manager.manifest b/cluster/saltbase/salt/kube-controller-manager/kube-controller-manager.manifest index cc68b0595e3..d3df6419226 100644 --- a/cluster/saltbase/salt/kube-controller-manager/kube-controller-manager.manifest +++ b/cluster/saltbase/salt/kube-controller-manager/kube-controller-manager.manifest @@ -10,7 +10,11 @@ {% if pillar['cluster_cidr'] is defined and pillar['cluster_cidr'] != "" -%} {% set cluster_cidr = "--cluster-cidr=" + pillar['cluster_cidr'] -%} {% endif -%} -{% if pillar['allocate_node_cidrs'] is defined -%} +# When we're using flannel it is responsible for cidr allocation. +# This is expected to be a short-term compromise. +{% if pillar.get('network_provider', '').lower() == 'flannel' %} + {% set allocate_node_cidrs = "--allocate-node-cidrs=false" -%} +{% elif pillar['allocate_node_cidrs'] is defined -%} {% set allocate_node_cidrs = "--allocate-node-cidrs=" + pillar['allocate_node_cidrs'] -%} {% endif -%} {% if pillar['terminated_pod_gc_threshold'] is defined -%} @@ -39,7 +43,7 @@ {% set root_ca_file = "--root-ca-file=/srv/kubernetes/ca.crt" -%} {% endif -%} -{% set params = "--master=127.0.0.1:8080" + " " + cluster_name + " " + cluster_cidr + " --allocate-node-cidrs=false" + " " + terminated_pod_gc + " " + cloud_provider + " " + cloud_config + service_account_key + pillar['log_level'] + " " + root_ca_file -%} +{% set params = "--master=127.0.0.1:8080" + " " + cluster_name + " " + cluster_cidr + " " + allocate_node_cidrs + " " + terminated_pod_gc + " " + cloud_provider + " " + cloud_config + service_account_key + pillar['log_level'] + " " + root_ca_file -%} # test_args has to be kept at the end, so they'll overwrite any prior configuration diff --git a/cluster/saltbase/salt/kubelet/default b/cluster/saltbase/salt/kubelet/default index 35242a71693..c642e4acc9f 100644 --- a/cluster/saltbase/salt/kubelet/default +++ b/cluster/saltbase/salt/kubelet/default @@ -85,6 +85,11 @@ {% set configure_cbr0 = "--configure-cbr0=" + pillar['allocate_node_cidrs'] -%} {% endif -%} +{% set experimental_flannel_overlay = "" -%} +{% if pillar.get('network_provider', '').lower() == 'flannel' %} + {% set experimental_flannel_overlay = "--experimental-flannel-overlay=true" %} +{% endif -%} + # Run containers under the root cgroup and create a system container. {% set system_container = "" -%} {% set cgroup_root = "" -%} @@ -117,4 +122,4 @@ {% endif -%} # test_args has to be kept at the end, so they'll overwrite any prior configuration -DAEMON_ARGS="{{daemon_args}} {{api_servers_with_port}} {{debugging_handlers}} {{hostname_override}} {{cloud_provider}} {{config}} {{manifest_url}} --allow-privileged={{pillar['allow_privileged']}} {{pillar['log_level']}} {{cluster_dns}} {{cluster_domain}} {{docker_root}} {{kubelet_root}} {{configure_cbr0}} {{cgroup_root}} {{system_container}} {{pod_cidr}} {{ master_kubelet_args }} {{cpu_cfs_quota}} {{network_plugin}} {{test_args}}" +DAEMON_ARGS="{{daemon_args}} {{api_servers_with_port}} {{debugging_handlers}} {{hostname_override}} {{cloud_provider}} {{config}} {{manifest_url}} --allow-privileged={{pillar['allow_privileged']}} {{pillar['log_level']}} {{cluster_dns}} {{cluster_domain}} {{docker_root}} {{kubelet_root}} {{configure_cbr0}} {{cgroup_root}} {{system_container}} {{pod_cidr}} {{ master_kubelet_args }} {{cpu_cfs_quota}} {{network_plugin}} {{experimental_flannel_overlay}} {{test_args}}" diff --git a/cluster/saltbase/salt/top.sls b/cluster/saltbase/salt/top.sls index 45294498427..5789f42ed5a 100644 --- a/cluster/saltbase/salt/top.sls +++ b/cluster/saltbase/salt/top.sls @@ -13,7 +13,9 @@ base: 'roles:kubernetes-pool': - match: grain - docker +{% if pillar.get('network_provider', '').lower() == 'flannel' %} - flannel +{% endif %} - helpers - cadvisor - kube-client-tools @@ -41,8 +43,10 @@ base: - match: grain - generate-cert - etcd +{% if pillar.get('network_provider', '').lower() == 'flannel' %} - flannel-server - flannel +{% endif %} - kube-apiserver - kube-controller-manager - kube-scheduler diff --git a/cmd/kube-controller-manager/app/controllermanager.go b/cmd/kube-controller-manager/app/controllermanager.go index 6c2724c5e5c..4cad923808a 100644 --- a/cmd/kube-controller-manager/app/controllermanager.go +++ b/cmd/kube-controller-manager/app/controllermanager.go @@ -292,8 +292,7 @@ func (s *CMServer) Run(_ []string) error { } if s.AllocateNodeCIDRs { - // TODO: Pipe this as a command line flag that corresponds to overlay==true - if cloud == nil || true { + if cloud == nil { glog.Warning("allocate-node-cidrs is set, but no cloud provider specified. Will not manage routes.") } else if routes, ok := cloud.Routes(); !ok { glog.Warning("allocate-node-cidrs is set, but cloud provider does not support routes. Will not manage routes.") @@ -301,6 +300,8 @@ func (s *CMServer) Run(_ []string) error { routeController := routecontroller.New(routes, kubeClient, s.ClusterName, &s.ClusterCIDR) routeController.Run(s.NodeSyncPeriod) } + } else { + glog.Infof("allocate-node-cidrs set to %v, node controller not creating routes", s.AllocateNodeCIDRs) } resourcequotacontroller.NewResourceQuotaController(kubeClient).Run(s.ResourceQuotaSyncPeriod) diff --git a/cmd/kubelet/app/server.go b/cmd/kubelet/app/server.go index 63bc35546d7..1196159ce4e 100644 --- a/cmd/kubelet/app/server.go +++ b/cmd/kubelet/app/server.go @@ -68,9 +68,8 @@ import ( ) const ( - defaultRootDir = "/var/lib/kubelet" - networkConfig = "/var/run/flannel/network.json" - useDefaultOverlay = true + defaultRootDir = "/var/lib/kubelet" + experimentalFlannelOverlay = false ) // KubeletServer encapsulates all of the parameters necessary for starting up @@ -158,8 +157,8 @@ type KubeletServer struct { KubeAPIBurst int // Pull images one at a time. - SerializeImagePulls bool - UseDefaultOverlay bool + SerializeImagePulls bool + ExperimentalFlannelOverlay bool } // bootstrapping interface for kubelet, targets the initialization protocol @@ -232,8 +231,7 @@ func NewKubeletServer() *KubeletServer { ReconcileCIDR: true, KubeAPIQPS: 5.0, KubeAPIBurst: 10, - // Flannel parameters - UseDefaultOverlay: useDefaultOverlay, + ExperimentalFlannelOverlay: experimentalFlannelOverlay, } } @@ -348,9 +346,7 @@ func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) { fs.Float32Var(&s.KubeAPIQPS, "kube-api-qps", s.KubeAPIQPS, "QPS to use while talking with kubernetes apiserver") fs.IntVar(&s.KubeAPIBurst, "kube-api-burst", s.KubeAPIBurst, "Burst to use while talking with kubernetes apiserver") fs.BoolVar(&s.SerializeImagePulls, "serialize-image-pulls", s.SerializeImagePulls, "Pull images one at a time. We recommend *not* changing the default value on nodes that run docker daemon with version < 1.9 or an Aufs storage backend. Issue #10959 has more details. [default=true]") - - // Flannel config parameters - fs.BoolVar(&s.UseDefaultOverlay, "use-default-overlay", s.UseDefaultOverlay, "Experimental support for starting the kubelet with the default overlay network (flannel). Assumes flanneld is already running in client mode. [default=false]") + fs.BoolVar(&s.ExperimentalFlannelOverlay, "experimental-flannel-overlay", s.ExperimentalFlannelOverlay, "Experimental support for starting the kubelet with the default overlay network (flannel). Assumes flanneld is already running in client mode. [default=false]") } // UnsecuredKubeletConfig returns a KubeletConfig suitable for being run, or an error if the server setup @@ -489,7 +485,7 @@ func (s *KubeletServer) UnsecuredKubeletConfig() (*KubeletConfig, error) { Writer: writer, VolumePlugins: ProbeVolumePlugins(), - UseDefaultOverlay: s.UseDefaultOverlay, + ExperimentalFlannelOverlay: s.ExperimentalFlannelOverlay, }, nil } @@ -962,7 +958,7 @@ type KubeletConfig struct { Writer io.Writer VolumePlugins []volume.VolumePlugin - UseDefaultOverlay bool + ExperimentalFlannelOverlay bool } func CreateAndInitKubelet(kc *KubeletConfig) (k KubeletBootstrap, pc *config.PodConfig, err error) { @@ -1045,8 +1041,7 @@ func CreateAndInitKubelet(kc *KubeletConfig) (k KubeletBootstrap, pc *config.Pod kc.OOMAdjuster, kc.SerializeImagePulls, kc.ContainerManager, - // Flannel parameters - kc.UseDefaultOverlay, + kc.ExperimentalFlannelOverlay, ) if err != nil { diff --git a/docs/admin/kubelet.md b/docs/admin/kubelet.md index 00f32a89985..4597f35020f 100644 --- a/docs/admin/kubelet.md +++ b/docs/admin/kubelet.md @@ -85,6 +85,7 @@ kubelet --enable-server[=true]: Enable the Kubelet's server --event-burst=10: Maximum size of a bursty event records, temporarily allows event records to burst to this number, while still not exceeding event-qps. Only used if --event-qps > 0 --event-qps=5: If > 0, limit event creations per second to this value. If 0, unlimited. + --experimental-flannel-overlay[=false]: Experimental support for starting the kubelet with the default overlay network (flannel). Assumes flanneld is already running in client mode. [default=false] --file-check-frequency=20s: Duration between checking config files for new data --google-json-key="": The Google Cloud Platform Service Account JSON Key to use for authentication. --healthz-bind-address=127.0.0.1: The IP address for the healthz server to serve on, defaulting to 127.0.0.1 (set to 0.0.0.0 for all interfaces) @@ -137,10 +138,9 @@ kubelet --system-container="": Optional resource-only container in which to place all non-kernel processes that are not already in a container. Empty for no container. Rolling back the flag requires a reboot. (Default: ""). --tls-cert-file="": File containing x509 Certificate for HTTPS. (CA cert, if any, concatenated after server cert). If --tls-cert-file and --tls-private-key-file are not provided, a self-signed certificate and key are generated for the public address and saved to the directory passed to --cert-dir. --tls-private-key-file="": File containing x509 private key matching --tls-cert-file. - --use-default-overlay[=true]: Experimental support for starting the kubelet with the default overlay network (flannel). Assumes flanneld is already running in client mode. [default=false] ``` -###### Auto generated by spf13/cobra on 23-Nov-2015 +###### Auto generated by spf13/cobra on 24-Nov-2015 diff --git a/hack/verify-flags/known-flags.txt b/hack/verify-flags/known-flags.txt index 2472f4bc1aa..fc5b2d0990a 100644 --- a/hack/verify-flags/known-flags.txt +++ b/hack/verify-flags/known-flags.txt @@ -327,4 +327,4 @@ watch-only whitelist-override-label windows-line-endings www-prefix -use-default-overlay +experimental-flannel-overlay diff --git a/pkg/kubelet/flannel_helper.go b/pkg/kubelet/flannel_helper.go index 91a16e8c369..c81cb594fec 100644 --- a/pkg/kubelet/flannel_helper.go +++ b/pkg/kubelet/flannel_helper.go @@ -32,6 +32,9 @@ import ( // TODO: Move all this to a network plugin. const ( + // TODO: The location of default docker options is distro specific, so this + // probably won't work on anything other than debian/ubuntu. This is a + // short-term compromise till we've moved overlay setup into a plugin. dockerOptsFile = "/etc/default/docker" flannelSubnetKey = "FLANNEL_SUBNET" flannelNetworkKey = "FLANNEL_NETWORK" @@ -78,7 +81,7 @@ func (f *FlannelHelper) Handshake() (podCIDR string, err error) { if _, err = os.Stat(f.subnetFile); err != nil { return "", fmt.Errorf("Waiting for subnet file %v", f.subnetFile) } - glog.Infof("(kubelet)Found flannel subnet file %v", f.subnetFile) + glog.Infof("Found flannel subnet file %v", f.subnetFile) config, err := parseKVConfig(f.subnetFile) if err != nil { @@ -115,7 +118,7 @@ func writeDockerOptsFromFlannelConfig(flannelConfig map[string]string) error { } opts, ok := dockerOpts[dockerOptsKey] if !ok { - glog.Errorf("(kubelet)Did not find docker opts, writing them") + glog.Errorf("Did not find docker opts, writing them") opts = fmt.Sprintf( " --bridge=cbr0 --iptables=false --ip-masq=false") } else { @@ -139,7 +142,7 @@ func parseKVConfig(filename string) (map[string]string, error) { return config, err } str := string(buff) - glog.Infof("(kubelet) Read kv options %+v from %v", str, filename) + glog.Infof("Read kv options %+v from %v", str, filename) for _, line := range strings.Split(str, "\n") { kv := strings.Split(line, "=") if len(kv) != 2 { @@ -160,6 +163,6 @@ func writeKVConfig(filename string, kv map[string]string) error { for k, v := range kv { content += fmt.Sprintf("%v=%v\n", k, v) } - glog.Warningf("(kubelet)Writing kv options %+v to %v", content, filename) + glog.Warningf("Writing kv options %+v to %v", content, filename) return ioutil.WriteFile(filename, []byte(content), 0644) } diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 8e45b69c159..c8a95a5296c 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -217,7 +217,7 @@ func NewMainKubelet( oomAdjuster *oom.OOMAdjuster, serializeImagePulls bool, containerManager cm.ContainerManager, - useDefaultOverlay bool, + flannelExperimentalOverlay bool, ) (*Kubelet, error) { if rootDirectory == "" { @@ -328,12 +328,19 @@ func NewMainKubelet( cpuCFSQuota: cpuCFSQuota, daemonEndpoints: daemonEndpoints, containerManager: containerManager, + flannelExperimentalOverlay: flannelExperimentalOverlay, flannelHelper: NewFlannelHelper(), - useDefaultOverlay: useDefaultOverlay, + } + if klet.flannelExperimentalOverlay { + glog.Infof("Flannel is in charge of podCIDR and overlay networking.") } if klet.kubeClient == nil { - glog.Infof("Master not setting up flannel overlay") - klet.useDefaultOverlay = false + // The master kubelet cannot wait for the flannel daemon because it is responsible + // for starting up the flannel server in a static pod. So even though the flannel + // daemon runs on the master, it doesn't hold up cluster bootstrap. All the pods + // on the master run with host networking, so the master flannel doesn't care + // even if the network changes. We only need it for the master proxy. + klet.flannelExperimentalOverlay = false } if plug, err := network.InitNetworkPlugin(networkPlugins, networkPluginName, &networkHost{klet}); err != nil { return nil, err @@ -656,8 +663,12 @@ type Kubelet struct { // oneTimeInitializer is used to initialize modules that are dependent on the runtime to be up. oneTimeInitializer sync.Once - useDefaultOverlay bool - flannelHelper *FlannelHelper + flannelExperimentalOverlay bool + + // TODO: Flannelhelper doesn't store any state, we can instantiate it + // on the fly if we're confident the dbus connetions it opens doesn't + // put the system under duress. + flannelHelper *FlannelHelper } func (kl *Kubelet) allSourcesReady() bool { @@ -2628,7 +2639,7 @@ var oldNodeUnschedulable bool func (kl *Kubelet) syncNetworkStatus() { var err error if kl.configureCBR0 { - if kl.useDefaultOverlay { + if kl.flannelExperimentalOverlay { podCIDR, err := kl.flannelHelper.Handshake() if err != nil { glog.Infof("Flannel server handshake failed %v", err) @@ -2903,13 +2914,24 @@ func (kl *Kubelet) tryUpdateNodeStatus() error { if node == nil { return fmt.Errorf("no node instance returned for %q", kl.nodeName) } - // TODO: Actually update the node spec with pod cidr, this is currently a no-op. - if kl.useDefaultOverlay { - node.Spec.PodCIDR = kl.runtimeState.podCIDR() + // Flannel is the authoritative source of pod CIDR, if it's running. + // This is a short term compromise till we get flannel working in + // reservation mode. + if kl.flannelExperimentalOverlay { + flannelPodCIDR := kl.runtimeState.podCIDR() + if node.Spec.PodCIDR != flannelPodCIDR { + node.Spec.PodCIDR = flannelPodCIDR + glog.Infof("Updating podcidr to %v", node.Spec.PodCIDR) + if updatedNode, err := kl.kubeClient.Nodes().Update(node); err != nil { + glog.Warningf("Failed to update podCIDR: %v", err) + } else { + // Update the node resourceVersion so the status update doesn't fail. + node = updatedNode + } + } } else if kl.reconcileCIDR { kl.runtimeState.setPodCIDR(node.Spec.PodCIDR) } - glog.Infof("Updating node in apiserver with cidr %v", node.Spec.PodCIDR) if err := kl.setNodeStatus(node); err != nil { return err