From 4cd1ee177bae15e18e4a10e34a3b2b841ac57401 Mon Sep 17 00:00:00 2001 From: Prashanth Balasubramanian Date: Sun, 6 Sep 2015 11:10:33 -0700 Subject: [PATCH 1/5] Salt configuration for flanneld --- cluster/gce/config-default.sh | 1 + cluster/gce/config-test.sh | 3 + cluster/gce/util.sh | 21 ++- cluster/saltbase/salt/etcd/etcd.manifest | 4 +- cluster/saltbase/salt/flannel/init.sls | 41 ++++++ cluster/saltbase/salt/flannel/initd | 126 ++++++++++++++++++ cluster/saltbase/salt/flannel/network.json | 8 ++ cluster/saltbase/salt/top.sls | 5 + .../app/controllermanager.go | 3 +- 9 files changed, 206 insertions(+), 6 deletions(-) create mode 100644 cluster/saltbase/salt/flannel/init.sls create mode 100644 cluster/saltbase/salt/flannel/initd create mode 100644 cluster/saltbase/salt/flannel/network.json diff --git a/cluster/gce/config-default.sh b/cluster/gce/config-default.sh index 8fd2fa8422f..b6bcf8ba272 100755 --- a/cluster/gce/config-default.sh +++ b/cluster/gce/config-default.sh @@ -120,3 +120,4 @@ OPENCONTRAIL_PUBLIC_SUBNET="${OPENCONTRAIL_PUBLIC_SUBNET:-10.1.0.0/16}" # Optional: if set to true, kube-up will configure the cluster to run e2e tests. E2E_STORAGE_TEST_ENVIRONMENT=${KUBE_E2E_STORAGE_TEST_ENVIRONMENT:-false} +FIREWALL_ETCD="${FIREWALL_SSH:-${NETWORK}-allow-etcd}" diff --git a/cluster/gce/config-test.sh b/cluster/gce/config-test.sh index 8eca5118d70..78b1cd69fd2 100755 --- a/cluster/gce/config-test.sh +++ b/cluster/gce/config-test.sh @@ -132,3 +132,6 @@ OPENCONTRAIL_PUBLIC_SUBNET="${OPENCONTRAIL_PUBLIC_SUBNET:-10.1.0.0/16}" # Optional: if set to true, kube-up will configure the cluster to run e2e tests. E2E_STORAGE_TEST_ENVIRONMENT=${KUBE_E2E_STORAGE_TEST_ENVIRONMENT:-false} +# Overlay network settings +OVERLAY_NETWORK=${OVERLAY_NETWORK:-true} +FIREWALL_ETCD="${FIREWALL_SSH:-${NETWORK}-allow-etcd}" diff --git a/cluster/gce/util.sh b/cluster/gce/util.sh index 655221c9d16..9e45f27fad5 100755 --- a/cluster/gce/util.sh +++ b/cluster/gce/util.sh @@ -304,7 +304,7 @@ function create-static-ip { echo -e "${color_red}Failed to create static ip $1 ${color_norm}" >&2 exit 2 fi - attempt=$(($attempt+1)) + attempt=$(($attempt+1)) echo -e "${color_yellow}Attempt $attempt failed to create static ip $1. Retrying.${color_norm}" >&2 sleep $(($attempt * 5)) else @@ -603,13 +603,28 @@ function kube-up { --allow "tcp:22" & fi - echo "Starting master and configuring firewalls" gcloud compute firewall-rules create "${MASTER_NAME}-https" \ --project "${PROJECT}" \ --network "${NETWORK}" \ --target-tags "${MASTER_TAG}" \ --allow tcp:443 & + if [[ "${OVERLAY_NETWORK}" == "true" ]]; then + # TODO: Where to put this? Scope it to flannel setup. + if ! "${GCLOUD}" compute firewall-rules --project "${PROJECT}" describe "${FIREWALL_ETCD}" &>/dev/null; then + "${GCLOUD}" compute firewall-rules create "${FIREWALL_ETCD}" \ + --network="${NETWORK}" \ + --project="${PROJECT}" \ + --source-ranges="10.0.0.0/8" \ + --target-tags "${MINION_TAG}" \ + --allow tcp:4001 & + else + echo "... Using etcd firewall-rule: ${FIREWALL_ETCD}" >&2 + fi + else + echo "Not opening etcd up to the cluster: ${OVERLAY_NETWORK} ${FIREWALL_ETCD}" + fi + # We have to make sure the disk is created before creating the master VM, so # run this in the foreground. gcloud compute disks create "${MASTER_NAME}-pd" \ @@ -672,7 +687,7 @@ function kube-up { write-node-env local template_name="${NODE_INSTANCE_PREFIX}-template" - + create-node-instance-template $template_name gcloud compute instance-groups managed \ diff --git a/cluster/saltbase/salt/etcd/etcd.manifest b/cluster/saltbase/salt/etcd/etcd.manifest index 33c04dab039..b9553ace2c6 100644 --- a/cluster/saltbase/salt/etcd/etcd.manifest +++ b/cluster/saltbase/salt/etcd/etcd.manifest @@ -19,7 +19,7 @@ "command": [ "/bin/sh", "-c", - "/usr/local/bin/etcd --listen-peer-urls http://127.0.0.1:{{ server_port }} --addr 127.0.0.1:{{ port }} --bind-addr 127.0.0.1:{{ port }} --data-dir /var/etcd/data{{ suffix }} 1>>/var/log/etcd{{ suffix }}.log 2>&1" + "/usr/local/bin/etcd --listen-peer-urls http://0.0.0.0:{{ server_port }} --addr 0.0.0.0:{{ port }} --bind-addr 0.0.0.0:{{ port }} --data-dir /var/etcd/data{{ suffix }} 1>>/var/log/etcd{{ suffix }}.log 2>&1" ], "livenessProbe": { "httpGet": { @@ -33,7 +33,7 @@ "ports":[ { "name": "serverport", "containerPort": {{ server_port }}, - "hostPort": {{ server_port }} + "hostPort": {{ server_port }} },{ "name": "clientport", "containerPort": {{ port }}, diff --git a/cluster/saltbase/salt/flannel/init.sls b/cluster/saltbase/salt/flannel/init.sls new file mode 100644 index 00000000000..d774f25aae3 --- /dev/null +++ b/cluster/saltbase/salt/flannel/init.sls @@ -0,0 +1,41 @@ +flannel-tar: + archive: + - extracted + - user: root + - name: /usr/local/src + - makedirs: True + - source: https://github.com/coreos/flannel/releases/download/v0.5.3/flannel-0.5.3-linux-amd64.tar.gz + - tar_options: v + - source_hash: md5=2a82ed82a37d71c85586977f0e475b70 + - archive_format: tar + - if_missing: /usr/local/src/flannel/flannel-0.5.3/ + +flannel-symlink: + file.symlink: + - name: /usr/local/bin/flanneld + - target: /usr/local/src/flannel-0.5.3/flanneld + - force: true + - watch: + - archive: flannel-tar + +/etc/init.d/flannel: + file.managed: + - source: salt://flannel/initd + - user: root + - group: root + - mode: 755 + +/var/run/flannel/network.json: + file.managed: + - source: salt://flannel/network.json + - makedirs: True + - user: root + - group: root + - mode: 755 + +flannel: + service.running: + - enable: True + - watch: + - file: /usr/local/bin/flanneld + - file: /etc/init.d/flannel diff --git a/cluster/saltbase/salt/flannel/initd b/cluster/saltbase/salt/flannel/initd new file mode 100644 index 00000000000..eacaaea5e8a --- /dev/null +++ b/cluster/saltbase/salt/flannel/initd @@ -0,0 +1,126 @@ +#!/bin/bash +# +### BEGIN INIT INFO +# Provides: flanneld +# Required-Start: $local_fs $network $syslog +# Required-Stop: +# Default-Start: 2 3 4 5 +# Default-Stop: 0 1 6 +# Short-Description: Flannel daemon +# Description: +# Flannel daemon. +### END INIT INFO + + +# PATH should only include /usr/* if it runs after the mountnfs.sh script +PATH=/sbin:/usr/sbin:/bin:/usr/bin +DESC="Flannel overlay network daemon" +NAME=flanneld +DAEMON=/usr/local/bin/flanneld +DAEMON_ARGS="-etcd-endpoints http://e2e-test-beeps-master:4001 -etcd-prefix /kubernetes.io/network" +DAEMON_LOG_FILE=/var/log/$NAME.log +PIDFILE=/var/run/$NAME.pid +SCRIPTNAME=/etc/init.d/$NAME +DAEMON_USER=root + +# Exit if the package is not installed +[ -x "$DAEMON" ] || exit 0 + +# Read configuration variable file if it is present +[ -r /etc/default/$NAME ] && . /etc/default/$NAME + +# Define LSB log_* functions. +# Depend on lsb-base (>= 3.2-14) to ensure that this file is present +# and status_of_proc is working. +. /lib/lsb/init-functions + +# +# Function that starts the daemon/service +# +do_start() +{ + # Avoid a potential race at boot time when both monit and init.d start + # the same service + PIDS=$(pidof $DAEMON) + for PID in ${PIDS}; do + kill -9 $PID + done + + # Return + # 0 if daemon has been started + # 1 if daemon was already running + # 2 if daemon could not be started + start-stop-daemon --start --quiet --background --no-close \ + --make-pidfile --pidfile $PIDFILE \ + --exec $DAEMON -c $DAEMON_USER --test > /dev/null \ + || return 1 + start-stop-daemon --start --quiet --background --no-close \ + --make-pidfile --pidfile $PIDFILE \ + --exec $DAEMON -c $DAEMON_USER -- \ + $DAEMON_ARGS >> $DAEMON_LOG_FILE 2>&1 \ + || return 2 +} + +# +# Function that stops the daemon/service +# +do_stop() +{ + # Return + # 0 if daemon has been stopped + # 1 if daemon was already stopped + # 2 if daemon could not be stopped + # other if a failure occurred + start-stop-daemon --stop --quiet --retry=TERM/30/KILL/5 --pidfile $PIDFILE --name $NAME + RETVAL="$?" + [ "$RETVAL" = 2 ] && return 2 + # Many daemons don't delete their pidfiles when they exit. + rm -f $PIDFILE + return "$RETVAL" +} + + +case "$1" in + start) + log_daemon_msg "Starting $DESC" "$NAME" + do_start + case "$?" in + 0|1) log_end_msg 0 || exit 0 ;; + 2) log_end_msg 1 || exit 1 ;; + esac + ;; + stop) + log_daemon_msg "Stopping $DESC" "$NAME" + do_stop + case "$?" in + 0|1) log_end_msg 0 ;; + 2) exit 1 ;; + esac + ;; + status) + status_of_proc -p $PIDFILE "$DAEMON" "$NAME" && exit 0 || exit $? + ;; + + restart|force-reload) + log_daemon_msg "Restarting $DESC" "$NAME" + do_stop + case "$?" in + 0|1) + do_start + case "$?" in + 0) log_end_msg 0 ;; + 1) log_end_msg 1 ;; # Old process is still running + *) log_end_msg 1 ;; # Failed to start + esac + ;; + *) + # Failed to stop + log_end_msg 1 + ;; + esac + ;; + *) + echo "Usage: $SCRIPTNAME {start|stop|status|restart|force-reload}" >&2 + exit 3 + ;; +esac diff --git a/cluster/saltbase/salt/flannel/network.json b/cluster/saltbase/salt/flannel/network.json new file mode 100644 index 00000000000..93c980e6922 --- /dev/null +++ b/cluster/saltbase/salt/flannel/network.json @@ -0,0 +1,8 @@ +{ + "Network": "10.245.0.0/16", + "SubnetLen": 24, + "Backend": { + "Type": "vxlan", + "VNI": 1 + } +} diff --git a/cluster/saltbase/salt/top.sls b/cluster/saltbase/salt/top.sls index c80c527e372..387e177efc6 100644 --- a/cluster/saltbase/salt/top.sls +++ b/cluster/saltbase/salt/top.sls @@ -13,6 +13,10 @@ base: 'roles:kubernetes-pool': - match: grain - docker + - flannel +{% if grains['cloud'] is defined and grains['cloud'] == 'azure' %} + - openvpn-client +{% endif %} - helpers - cadvisor - kube-client-tools @@ -40,6 +44,7 @@ base: - match: grain - generate-cert - etcd + - flannel - kube-apiserver - kube-controller-manager - kube-scheduler diff --git a/cmd/kube-controller-manager/app/controllermanager.go b/cmd/kube-controller-manager/app/controllermanager.go index 448b7ed0d1f..6c2724c5e5c 100644 --- a/cmd/kube-controller-manager/app/controllermanager.go +++ b/cmd/kube-controller-manager/app/controllermanager.go @@ -292,7 +292,8 @@ func (s *CMServer) Run(_ []string) error { } if s.AllocateNodeCIDRs { - if cloud == nil { + // TODO: Pipe this as a command line flag that corresponds to overlay==true + if cloud == nil || true { glog.Warning("allocate-node-cidrs is set, but no cloud provider specified. Will not manage routes.") } else if routes, ok := cloud.Routes(); !ok { glog.Warning("allocate-node-cidrs is set, but cloud provider does not support routes. Will not manage routes.") From 7aa8ebe30f90d931b1e0708910f3a94fbbc7c5c9 Mon Sep 17 00:00:00 2001 From: Prashanth Balasubramanian Date: Fri, 20 Nov 2015 19:41:32 -0800 Subject: [PATCH 2/5] Flannel handshakes with kubelet. --- cluster/saltbase/salt/flannel/init.sls | 8 +- .../kube-controller-manager.manifest | 2 +- cmd/kubelet/app/server.go | 28 +++- pkg/kubelet/flannel_server.go | 132 ++++++++++++++++++ pkg/kubelet/kubelet.go | 37 ++++- 5 files changed, 197 insertions(+), 10 deletions(-) create mode 100644 pkg/kubelet/flannel_server.go diff --git a/cluster/saltbase/salt/flannel/init.sls b/cluster/saltbase/salt/flannel/init.sls index d774f25aae3..9e8d579dd92 100644 --- a/cluster/saltbase/salt/flannel/init.sls +++ b/cluster/saltbase/salt/flannel/init.sls @@ -4,16 +4,16 @@ flannel-tar: - user: root - name: /usr/local/src - makedirs: True - - source: https://github.com/coreos/flannel/releases/download/v0.5.3/flannel-0.5.3-linux-amd64.tar.gz + - source: https://github.com/coreos/flannel/releases/download/v0.5.5/flannel-0.5.5-linux-amd64.tar.gz - tar_options: v - - source_hash: md5=2a82ed82a37d71c85586977f0e475b70 + - source_hash: md5=972c717254775bef528f040af804f2cc - archive_format: tar - - if_missing: /usr/local/src/flannel/flannel-0.5.3/ + - if_missing: /usr/local/src/flannel/flannel-0.5.5/ flannel-symlink: file.symlink: - name: /usr/local/bin/flanneld - - target: /usr/local/src/flannel-0.5.3/flanneld + - target: /usr/local/src/flannel-0.5.5/flanneld - force: true - watch: - archive: flannel-tar diff --git a/cluster/saltbase/salt/kube-controller-manager/kube-controller-manager.manifest b/cluster/saltbase/salt/kube-controller-manager/kube-controller-manager.manifest index 23af5ebad58..cc68b0595e3 100644 --- a/cluster/saltbase/salt/kube-controller-manager/kube-controller-manager.manifest +++ b/cluster/saltbase/salt/kube-controller-manager/kube-controller-manager.manifest @@ -39,7 +39,7 @@ {% set root_ca_file = "--root-ca-file=/srv/kubernetes/ca.crt" -%} {% endif -%} -{% set params = "--master=127.0.0.1:8080" + " " + cluster_name + " " + cluster_cidr + " " + allocate_node_cidrs + " " + terminated_pod_gc + " " + cloud_provider + " " + cloud_config + service_account_key + pillar['log_level'] + " " + root_ca_file -%} +{% set params = "--master=127.0.0.1:8080" + " " + cluster_name + " " + cluster_cidr + " --allocate-node-cidrs=false" + " " + terminated_pod_gc + " " + cloud_provider + " " + cloud_config + service_account_key + pillar['log_level'] + " " + root_ca_file -%} # test_args has to be kept at the end, so they'll overwrite any prior configuration diff --git a/cmd/kubelet/app/server.go b/cmd/kubelet/app/server.go index 64845f16581..2bc3e34cf6c 100644 --- a/cmd/kubelet/app/server.go +++ b/cmd/kubelet/app/server.go @@ -67,7 +67,11 @@ import ( "k8s.io/kubernetes/pkg/cloudprovider" ) -const defaultRootDir = "/var/lib/kubelet" +const ( + defaultRootDir = "/var/lib/kubelet" + networkConfig = "/var/run/flannel/network.json" + useDefaultOverlay = true +) // KubeletServer encapsulates all of the parameters necessary for starting up // a kubelet. These can either be set via command line or directly. @@ -155,6 +159,10 @@ type KubeletServer struct { // Pull images one at a time. SerializeImagePulls bool + + // Flannel config parameters + UseDefaultOverlay bool + NetworkConfig string } // bootstrapping interface for kubelet, targets the initialization protocol @@ -227,6 +235,9 @@ func NewKubeletServer() *KubeletServer { ReconcileCIDR: true, KubeAPIQPS: 5.0, KubeAPIBurst: 10, + // Flannel parameters + UseDefaultOverlay: useDefaultOverlay, + // NetworkConfig: networkConfig, } } @@ -341,6 +352,10 @@ func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) { fs.Float32Var(&s.KubeAPIQPS, "kube-api-qps", s.KubeAPIQPS, "QPS to use while talking with kubernetes apiserver") fs.IntVar(&s.KubeAPIBurst, "kube-api-burst", s.KubeAPIBurst, "Burst to use while talking with kubernetes apiserver") fs.BoolVar(&s.SerializeImagePulls, "serialize-image-pulls", s.SerializeImagePulls, "Pull images one at a time. We recommend *not* changing the default value on nodes that run docker daemon with version < 1.9 or an Aufs storage backend. Issue #10959 has more details. [default=true]") + + // Flannel config parameters + fs.BoolVar(&s.UseDefaultOverlay, "use-default-overlay", s.UseDefaultOverlay, "Experimental support for starting the kubelet with the default overlay network (flannel). Assumes flanneld is already running in client mode. [default=false]") + fs.StringVar(&s.NetworkConfig, "network-config", s.NetworkConfig, "Absolute path to a network json file, as accepted by flannel.") } // UnsecuredKubeletConfig returns a KubeletConfig suitable for being run, or an error if the server setup @@ -478,6 +493,10 @@ func (s *KubeletServer) UnsecuredKubeletConfig() (*KubeletConfig, error) { TLSOptions: tlsOptions, Writer: writer, VolumePlugins: ProbeVolumePlugins(), + + // Flannel options + UseDefaultOverlay: s.UseDefaultOverlay, + NetworkConfig: s.NetworkConfig, }, nil } @@ -949,6 +968,10 @@ type KubeletConfig struct { TLSOptions *kubelet.TLSOptions Writer io.Writer VolumePlugins []volume.VolumePlugin + + // Flannel parameters + UseDefaultOverlay bool + NetworkConfig string } func CreateAndInitKubelet(kc *KubeletConfig) (k KubeletBootstrap, pc *config.PodConfig, err error) { @@ -1031,6 +1054,9 @@ func CreateAndInitKubelet(kc *KubeletConfig) (k KubeletBootstrap, pc *config.Pod kc.OOMAdjuster, kc.SerializeImagePulls, kc.ContainerManager, + // Flannel parameters + kc.UseDefaultOverlay, + //kc.NetworkConfig, ) if err != nil { diff --git a/pkg/kubelet/flannel_server.go b/pkg/kubelet/flannel_server.go new file mode 100644 index 00000000000..fcfdea1a43f --- /dev/null +++ b/pkg/kubelet/flannel_server.go @@ -0,0 +1,132 @@ +package kubelet + +import ( + "fmt" + "io/ioutil" + "os" + "os/exec" + "strconv" + "strings" + + "github.com/golang/glog" +) + +const ( + networkType = "vxlan" + dockerOptsFile = "/etc/default/docker" + flannelSubnetKey = "FLANNEL_SUBNET" + flannelNetworkKey = "FLANNEL_NETWORK" + flannelMtuKey = "FLANNEL_MTU" + dockerOptsKey = "DOCKER_OPTS" + flannelSubnetFile = "/var/run/flannel/subnet.env" +) + +type FlannelServer struct { + subnetFile string + // TODO: Manage subnet file. +} + +func NewFlannelServer() *FlannelServer { + return &FlannelServer{flannelSubnetFile} +} + +func (f *FlannelServer) Handshake() (podCIDR string, err error) { + // Flannel daemon will hang till the server comes up, kubelet will hang until + // flannel daemon has written subnet env variables. This is the kubelet handshake. + // To improve performance, we could defer just the configuration of the container + // bridge till after subnet.env is written. Keeping it local is clearer for now. + // TODO: Using a file to communicate is brittle + if _, err = os.Stat(f.subnetFile); err != nil { + return "", fmt.Errorf("Waiting for subnet file %v", f.subnetFile) + } + glog.Infof("(kubelet)Found flannel subnet file %v", f.subnetFile) + + // TODO: Rest of this function is a hack. + config, err := parseKVConfig(f.subnetFile) + if err != nil { + return "", err + } + if err = writeDockerOptsFromFlannelConfig(config); err != nil { + return "", err + } + podCIDR, ok := config[flannelSubnetKey] + if !ok { + return "", fmt.Errorf("No flannel subnet, config %+v", config) + } + kubeNetwork, ok := config[flannelNetworkKey] + if !ok { + return "", fmt.Errorf("No flannel network, config %+v", config) + } + if err := exec.Command("iptables", + "-t", "nat", + "-A", "POSTROUTING", + "!", "-d", kubeNetwork, + "-s", podCIDR, + "-j", "MASQUERADE").Run(); err != nil { + return "", fmt.Errorf("Unable to install iptables rule for flannel.") + } + return podCIDR, nil +} + +// Take env variables from flannel subnet env and write to /etc/docker/defaults. +func writeDockerOptsFromFlannelConfig(flannelConfig map[string]string) error { + // TODO: Write dockeropts to unit file on systemd machines + // https://github.com/docker/docker/issues/9889 + mtu, ok := flannelConfig[flannelMtuKey] + if !ok { + return fmt.Errorf("No flannel mtu, flannel config %+v", flannelConfig) + } + dockerOpts, err := parseKVConfig(dockerOptsFile) + if err != nil { + return err + } + opts, ok := dockerOpts[dockerOptsKey] + if !ok { + glog.Errorf("(kubelet)Did not find docker opts, writing them") + opts = fmt.Sprintf( + " --bridge=cbr0 --iptables=false --ip-masq=false") + } else { + opts, _ = strconv.Unquote(opts) + } + dockerOpts[dockerOptsKey] = fmt.Sprintf("\"%v --mtu=%v\"", opts, mtu) + if err = writeKVConfig(dockerOptsFile, dockerOpts); err != nil { + return err + } + return nil +} + +// parseKVConfig takes a file with key-value env variables and returns a dictionary mapping the same. +func parseKVConfig(filename string) (map[string]string, error) { + config := map[string]string{} + if _, err := os.Stat(filename); err != nil { + return config, err + } + buff, err := ioutil.ReadFile(filename) + if err != nil { + return config, err + } + str := string(buff) + glog.Infof("(kubelet) Read kv options %+v from %v", str, filename) + for _, line := range strings.Split(str, "\n") { + kv := strings.Split(line, "=") + if len(kv) != 2 { + glog.Warningf("Ignoring non key-value pair %v", kv) + continue + } + config[string(kv[0])] = string(kv[1]) + } + return config, nil +} + +// writeKVConfig writes a kv map as env variables into the given file. +func writeKVConfig(filename string, kv map[string]string) error { + if _, err := os.Stat(filename); err != nil { + return err + } + content := "" + for k, v := range kv { + content += fmt.Sprintf("%v=%v\n", k, v) + } + glog.Warningf("(kubelet)Writing kv options %+v to %v", content, filename) + return ioutil.WriteFile(filename, []byte(content), 0644) +} diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index a1a5ca6fe1d..575987a5841 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -217,6 +217,7 @@ func NewMainKubelet( oomAdjuster *oom.OOMAdjuster, serializeImagePulls bool, containerManager cm.ContainerManager, + useDefaultOverlay bool, ) (*Kubelet, error) { if rootDirectory == "" { @@ -327,8 +328,16 @@ func NewMainKubelet( cpuCFSQuota: cpuCFSQuota, daemonEndpoints: daemonEndpoints, containerManager: containerManager, - } + // Flannel options + // TODO: This is currently a dummy server. + flannelServer: NewFlannelServer(), + useDefaultOverlay: useDefaultOverlay, + } + if klet.kubeClient == nil { + glog.Infof("Master not setting up flannel overlay") + klet.useDefaultOverlay = false + } if plug, err := network.InitNetworkPlugin(networkPlugins, networkPluginName, &networkHost{klet}); err != nil { return nil, err } else { @@ -649,6 +658,10 @@ type Kubelet struct { // oneTimeInitializer is used to initialize modules that are dependent on the runtime to be up. oneTimeInitializer sync.Once + + // Flannel options. + useDefaultOverlay bool + flannelServer *FlannelServer } func (kl *Kubelet) allSourcesReady() bool { @@ -1116,6 +1129,7 @@ func (kl *Kubelet) syncNodeStatus() { } if kl.registerNode { // This will exit immediately if it doesn't need to do anything. + glog.Infof("(kubelet) registering node with apiserver") kl.registerWithApiserver() } if err := kl.updateNodeStatus(); err != nil { @@ -2574,10 +2588,10 @@ func (kl *Kubelet) updateRuntimeUp() { func (kl *Kubelet) reconcileCBR0(podCIDR string) error { if podCIDR == "" { - glog.V(5).Info("PodCIDR not set. Will not configure cbr0.") + glog.V(1).Info("(kubelet) PodCIDR not set. Will not configure cbr0.") return nil } - glog.V(5).Infof("PodCIDR is set to %q", podCIDR) + glog.V(1).Infof("(kubelet) PodCIDR is set to %q", podCIDR) _, cidr, err := net.ParseCIDR(podCIDR) if err != nil { return err @@ -2619,6 +2633,17 @@ var oldNodeUnschedulable bool func (kl *Kubelet) syncNetworkStatus() { var err error if kl.configureCBR0 { + if kl.useDefaultOverlay { + glog.Infof("(kubelet) handshaking") + podCIDR, err := kl.flannelServer.Handshake() + if err != nil { + glog.Infof("Flannel server handshake failed %v", err) + return + } + glog.Infof("(kubelet) setting cidr, currently: %v -> %v", + kl.runtimeState.podCIDR(), podCIDR) + kl.runtimeState.setPodCIDR(podCIDR) + } if err := ensureIPTablesMasqRule(); err != nil { err = fmt.Errorf("Error on adding ip table rules: %v", err) glog.Error(err) @@ -2884,9 +2909,13 @@ func (kl *Kubelet) tryUpdateNodeStatus() error { if node == nil { return fmt.Errorf("no node instance returned for %q", kl.nodeName) } - if kl.reconcileCIDR { + // TODO: Actually update the node spec with pod cidr, this is currently a no-op. + if kl.useDefaultOverlay { + node.Spec.PodCIDR = kl.runtimeState.podCIDR() + } else if kl.reconcileCIDR { kl.runtimeState.setPodCIDR(node.Spec.PodCIDR) } + glog.Infof("(kubelet) updating node in apiserver with cidr %v", node.Spec.PodCIDR) if err := kl.setNodeStatus(node); err != nil { return err From 321bc732646768be4f6cdbc43720f809c7d744c9 Mon Sep 17 00:00:00 2001 From: Prashanth Balasubramanian Date: Sat, 21 Nov 2015 17:37:32 -0800 Subject: [PATCH 3/5] Flannel server in static pod with private etcd. --- cluster/saltbase/salt/etcd/etcd.manifest | 4 +- .../flannel-server/flannel-server.manifest | 99 +++++++++++++++++++ cluster/saltbase/salt/flannel-server/init.sls | 24 +++++ .../saltbase/salt/flannel-server/network.json | 8 ++ cluster/saltbase/salt/flannel/default | 6 ++ cluster/saltbase/salt/flannel/init.sls | 17 ++-- cluster/saltbase/salt/flannel/initd | 4 +- cluster/saltbase/salt/flannel/network.json | 2 +- cluster/saltbase/salt/top.sls | 1 + docs/proposals/flannel-integration.md | 35 +++++++ pkg/kubelet/flannel_server.go | 57 +++++++---- pkg/kubelet/kubelet.go | 6 +- pkg/master/ports/ports.go | 2 + 13 files changed, 229 insertions(+), 36 deletions(-) create mode 100644 cluster/saltbase/salt/flannel-server/flannel-server.manifest create mode 100644 cluster/saltbase/salt/flannel-server/init.sls create mode 100644 cluster/saltbase/salt/flannel-server/network.json create mode 100644 cluster/saltbase/salt/flannel/default create mode 100644 docs/proposals/flannel-integration.md diff --git a/cluster/saltbase/salt/etcd/etcd.manifest b/cluster/saltbase/salt/etcd/etcd.manifest index b9553ace2c6..33c04dab039 100644 --- a/cluster/saltbase/salt/etcd/etcd.manifest +++ b/cluster/saltbase/salt/etcd/etcd.manifest @@ -19,7 +19,7 @@ "command": [ "/bin/sh", "-c", - "/usr/local/bin/etcd --listen-peer-urls http://0.0.0.0:{{ server_port }} --addr 0.0.0.0:{{ port }} --bind-addr 0.0.0.0:{{ port }} --data-dir /var/etcd/data{{ suffix }} 1>>/var/log/etcd{{ suffix }}.log 2>&1" + "/usr/local/bin/etcd --listen-peer-urls http://127.0.0.1:{{ server_port }} --addr 127.0.0.1:{{ port }} --bind-addr 127.0.0.1:{{ port }} --data-dir /var/etcd/data{{ suffix }} 1>>/var/log/etcd{{ suffix }}.log 2>&1" ], "livenessProbe": { "httpGet": { @@ -33,7 +33,7 @@ "ports":[ { "name": "serverport", "containerPort": {{ server_port }}, - "hostPort": {{ server_port }} + "hostPort": {{ server_port }} },{ "name": "clientport", "containerPort": {{ port }}, diff --git a/cluster/saltbase/salt/flannel-server/flannel-server.manifest b/cluster/saltbase/salt/flannel-server/flannel-server.manifest new file mode 100644 index 00000000000..a0811ec0837 --- /dev/null +++ b/cluster/saltbase/salt/flannel-server/flannel-server.manifest @@ -0,0 +1,99 @@ +{ + "kind": "Pod", + "apiVersion": "v1", + "metadata": { + "name": "flannel-helper", + "namespace": "kube-system", + "labels": { + "app": "flannel-helper", + "version": "v0.1" + } + }, + "spec": { + "volumes": [ + { + "name": "varlogflannel", + "hostPath": { + "path": "/var/log" + } + }, + { + "name": "etcdstorage", + "emptyDir": {} + }, + { + "name": "networkconfig", + "hostPath": { + "path": "/etc/kubernetes/network.json" + } + } + ], + "containers": [ + { + "name": "flannel-helper", + "image": "bprashanth/flannel-helper:0.1", + "args": [ + "--network-config=/etc/kubernetes/network.json", + "--etcd-prefix=/kubernetes.io/network", + "--etcd-server=http://127.0.0.1:4001" + ], + "volumeMounts": [ + { + "name": "networkconfig", + "mountPath": "/etc/kubernetes/network.json" + } + ], + "imagePullPolicy": "Always" + }, + { + "name": "flannel-container", + "image": "quay.io/coreos/flannel:0.5.5", + "command": [ + "/bin/sh", + "-c", + "/opt/bin/flanneld -listen 0.0.0.0:10253 -etcd-endpoints http://127.0.0.1:4001 -etcd-prefix /kubernetes.io/network 1>>/var/log/flannel_server.log 2>&1" + ], + "ports": [ + { + "hostPort": 10253, + "containerPort": 10253 + } + ], + "resources": { + "limits": { + "cpu": "100m" + } + }, + "volumeMounts": [ + { + "name": "varlogflannel", + "mountPath": "/var/log" + } + ] + }, + { + "name": "etcd-container", + "image": "gcr.io/google_containers/etcd:2.2.1", + "command": [ + "/bin/sh", + "-c", + "/opt/bin/etcd --listen-peer-urls http://127.0.0.1:4001 --addr http://127.0.0.1:4001 --bind-addr 127.0.0.1:4001 --data-dir /var/etcd/data 1>>/var/log/etcd_flannel.log 2>&1" + ], + "resources": { + "limits": { + "cpu": "100m", + "memory": "50Mi" + } + }, + "volumeMounts": [ + { + "name": "etcdstorage", + "mountPath": "/var/etcd/data" + } + ] + } + ], + "hostNetwork": true + } +} + diff --git a/cluster/saltbase/salt/flannel-server/init.sls b/cluster/saltbase/salt/flannel-server/init.sls new file mode 100644 index 00000000000..154d943adb8 --- /dev/null +++ b/cluster/saltbase/salt/flannel-server/init.sls @@ -0,0 +1,24 @@ +touch /var/log/flannel.log: + cmd.run: + - creates: /var/log/flannel.log + +touch /var/log/etcd_flannel.log: + cmd.run: + - creates: /var/log/etcd_flannel.log + +/etc/kubernetes/network.json: + file.managed: + - source: salt://flannel/network.json + - makedirs: True + - user: root + - group: root + - mode: 755 + +/etc/kubernetes/manifests/flannel-server.manifest: + file.managed: + - source: salt://flannel-server/flannel-server.manifest + - user: root + - group: root + - mode: 644 + - makedirs: true + - dir_mode: 755 diff --git a/cluster/saltbase/salt/flannel-server/network.json b/cluster/saltbase/salt/flannel-server/network.json new file mode 100644 index 00000000000..2d199ea86e1 --- /dev/null +++ b/cluster/saltbase/salt/flannel-server/network.json @@ -0,0 +1,8 @@ +{ + "Network": "192.168.0.0/16", + "SubnetLen": 26, + "Backend": { + "Type": "vxlan", + "VNI": 1 + } +} diff --git a/cluster/saltbase/salt/flannel/default b/cluster/saltbase/salt/flannel/default new file mode 100644 index 00000000000..a4940061070 --- /dev/null +++ b/cluster/saltbase/salt/flannel/default @@ -0,0 +1,6 @@ +{% if grains.api_servers is defined -%} + {% set daemon_args = "-remote " + grains.api_servers + ":10253" -%} +{% else -%} + {% set daemon_args = "-remote 127.0.0.1:10253" -%} +{% endif -%} +DAEMON_ARGS="{{daemon_args}}" diff --git a/cluster/saltbase/salt/flannel/init.sls b/cluster/saltbase/salt/flannel/init.sls index 9e8d579dd92..8f36c953d3a 100644 --- a/cluster/saltbase/salt/flannel/init.sls +++ b/cluster/saltbase/salt/flannel/init.sls @@ -18,6 +18,14 @@ flannel-symlink: - watch: - archive: flannel-tar +/etc/default/flannel: + file.managed: + - source: salt://flannel/default + - template: jinja + - user: root + - group: root + - mode: 644 + /etc/init.d/flannel: file.managed: - source: salt://flannel/initd @@ -25,17 +33,10 @@ flannel-symlink: - group: root - mode: 755 -/var/run/flannel/network.json: - file.managed: - - source: salt://flannel/network.json - - makedirs: True - - user: root - - group: root - - mode: 755 - flannel: service.running: - enable: True - watch: - file: /usr/local/bin/flanneld - file: /etc/init.d/flannel + - file: /etc/default/flannel diff --git a/cluster/saltbase/salt/flannel/initd b/cluster/saltbase/salt/flannel/initd index eacaaea5e8a..3e3a98eaa76 100644 --- a/cluster/saltbase/salt/flannel/initd +++ b/cluster/saltbase/salt/flannel/initd @@ -15,9 +15,9 @@ # PATH should only include /usr/* if it runs after the mountnfs.sh script PATH=/sbin:/usr/sbin:/bin:/usr/bin DESC="Flannel overlay network daemon" -NAME=flanneld +NAME=flannel DAEMON=/usr/local/bin/flanneld -DAEMON_ARGS="-etcd-endpoints http://e2e-test-beeps-master:4001 -etcd-prefix /kubernetes.io/network" +DAEMON_ARGS="" DAEMON_LOG_FILE=/var/log/$NAME.log PIDFILE=/var/run/$NAME.pid SCRIPTNAME=/etc/init.d/$NAME diff --git a/cluster/saltbase/salt/flannel/network.json b/cluster/saltbase/salt/flannel/network.json index 93c980e6922..c8d8e0788b8 100644 --- a/cluster/saltbase/salt/flannel/network.json +++ b/cluster/saltbase/salt/flannel/network.json @@ -1,5 +1,5 @@ { - "Network": "10.245.0.0/16", + "Network": "18.16.0.0/16", "SubnetLen": 24, "Backend": { "Type": "vxlan", diff --git a/cluster/saltbase/salt/top.sls b/cluster/saltbase/salt/top.sls index 387e177efc6..5c40a36e44a 100644 --- a/cluster/saltbase/salt/top.sls +++ b/cluster/saltbase/salt/top.sls @@ -44,6 +44,7 @@ base: - match: grain - generate-cert - etcd + - flannel-server - flannel - kube-apiserver - kube-controller-manager diff --git a/docs/proposals/flannel-integration.md b/docs/proposals/flannel-integration.md new file mode 100644 index 00000000000..5f33ec3076d --- /dev/null +++ b/docs/proposals/flannel-integration.md @@ -0,0 +1,35 @@ +# Flannel integration with Kubernetes + +## Why? + +* Networking works out of the box. +* Cloud gateway configuration is regulated. +* Consistent bare metal and cloud experience. +* Lays foundation for integrating with networking backends and vendors. + +# How? + +``` +Master Node1 +---------------------|-------------------------------- +database | + | | +{10.250.0.0/16} | docker + | here's podcidr |restart with podcidr +apiserver <------------------- kubelet + | | |here's podcidr +flannel-server:10253 <------- flannel-daemon + --/16---> + <--watch-- [config iptables] + subscribe to new node subnets + --------> [config VXLan] + | +``` + +There is a tiny lie in the above diagram, as of now, the flannel server on the master maintains a private etcd. This will not be necessary once we have a generalized network resource, and a Kubernetes x flannel backend. + +# Limitations + +* Integration is experimental + +# Wishlist diff --git a/pkg/kubelet/flannel_server.go b/pkg/kubelet/flannel_server.go index fcfdea1a43f..d485a84c7e2 100644 --- a/pkg/kubelet/flannel_server.go +++ b/pkg/kubelet/flannel_server.go @@ -4,15 +4,18 @@ import ( "fmt" "io/ioutil" "os" - "os/exec" "strconv" "strings" + utildbus "k8s.io/kubernetes/pkg/util/dbus" + utilexec "k8s.io/kubernetes/pkg/util/exec" + utiliptables "k8s.io/kubernetes/pkg/util/iptables" + "github.com/golang/glog" ) +// TODO: Move all this to a network plugin. const ( - networkType = "vxlan" dockerOptsFile = "/etc/default/docker" flannelSubnetKey = "FLANNEL_SUBNET" flannelNetworkKey = "FLANNEL_NETWORK" @@ -21,27 +24,46 @@ const ( flannelSubnetFile = "/var/run/flannel/subnet.env" ) -type FlannelServer struct { - subnetFile string - // TODO: Manage subnet file. +// A Kubelet to flannel bridging helper. +type FlannelHelper struct { + subnetFile string + iptablesHelper utiliptables.Interface } -func NewFlannelServer() *FlannelServer { - return &FlannelServer{flannelSubnetFile} +// NewFlannelHelper creates a new flannel helper. +func NewFlannelHelper() *FlannelHelper { + return &FlannelHelper{ + subnetFile: flannelSubnetFile, + iptablesHelper: utiliptables.New(utilexec.New(), utildbus.New(), utiliptables.ProtocolIpv4), + } } -func (f *FlannelServer) Handshake() (podCIDR string, err error) { - // Flannel daemon will hang till the server comes up, kubelet will hang until - // flannel daemon has written subnet env variables. This is the kubelet handshake. - // To improve performance, we could defer just the configuration of the container - // bridge till after subnet.env is written. Keeping it local is clearer for now. +// Ensure the required MASQUERADE rules exist for the given network/cidr. +func (f *FlannelHelper) ensureFlannelMasqRule(kubeNetwork, podCIDR string) error { + // TODO: Investigate delegation to flannel via -ip-masq=true once flannel + // issue #374 is resolved. + comment := "Flannel masquerade facilitates pod<->node traffic." + args := []string{ + "-m", "comment", "--comment", comment, + "!", "-d", kubeNetwork, "-s", podCIDR, "-j", "MASQUERADE", + } + _, err := f.iptablesHelper.EnsureRule( + utiliptables.Append, + utiliptables.TableNAT, + utiliptables.ChainPostrouting, + args...) + return err +} + +// Handshake waits for the flannel subnet file and installs a few IPTables +// rules, returning the pod CIDR allocated for this node. +func (f *FlannelHelper) Handshake() (podCIDR string, err error) { // TODO: Using a file to communicate is brittle if _, err = os.Stat(f.subnetFile); err != nil { return "", fmt.Errorf("Waiting for subnet file %v", f.subnetFile) } glog.Infof("(kubelet)Found flannel subnet file %v", f.subnetFile) - // TODO: Rest of this function is a hack. config, err := parseKVConfig(f.subnetFile) if err != nil { return "", err @@ -57,13 +79,8 @@ func (f *FlannelServer) Handshake() (podCIDR string, err error) { if !ok { return "", fmt.Errorf("No flannel network, config %+v", config) } - if err := exec.Command("iptables", - "-t", "nat", - "-A", "POSTROUTING", - "!", "-d", kubeNetwork, - "-s", podCIDR, - "-j", "MASQUERADE").Run(); err != nil { - return "", fmt.Errorf("Unable to install iptables rule for flannel.") + if f.ensureFlannelMasqRule(kubeNetwork, podCIDR); err != nil { + return "", fmt.Errorf("Unable to install flannel masquerade %v", err) } return podCIDR, nil } diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 575987a5841..349103a13f8 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -331,7 +331,7 @@ func NewMainKubelet( // Flannel options // TODO: This is currently a dummy server. - flannelServer: NewFlannelServer(), + flannelHelper: NewFlannelHelper(), useDefaultOverlay: useDefaultOverlay, } if klet.kubeClient == nil { @@ -661,7 +661,7 @@ type Kubelet struct { // Flannel options. useDefaultOverlay bool - flannelServer *FlannelServer + flannelHelper *FlannelHelper } func (kl *Kubelet) allSourcesReady() bool { @@ -2635,7 +2635,7 @@ func (kl *Kubelet) syncNetworkStatus() { if kl.configureCBR0 { if kl.useDefaultOverlay { glog.Infof("(kubelet) handshaking") - podCIDR, err := kl.flannelServer.Handshake() + podCIDR, err := kl.flannelHelper.Handshake() if err != nil { glog.Infof("Flannel server handshake failed %v", err) return diff --git a/pkg/master/ports/ports.go b/pkg/master/ports/ports.go index 8914fe19775..bbe702e2d37 100644 --- a/pkg/master/ports/ports.go +++ b/pkg/master/ports/ports.go @@ -32,6 +32,8 @@ const ( // ControllerManagerPort is the default port for the controller manager status server. // May be overridden by a flag at startup. ControllerManagerPort = 10252 + // Port for flannel daemon. + FlannelDaemonPort = 10253 // KubeletReadOnlyPort exposes basic read-only services from the kubelet. // May be overridden by a flag at startup. // This is necessary for heapster to collect monitoring stats from the kubelet From ad2d3d4c205b679e7f87cb7ae6d7d5ee737558b7 Mon Sep 17 00:00:00 2001 From: Prashanth Balasubramanian Date: Sun, 22 Nov 2015 16:06:04 -0800 Subject: [PATCH 4/5] Docs etc --- cluster/gce/config-default.sh | 1 - cluster/gce/config-test.sh | 1 - cluster/gce/util.sh | 21 +-- cluster/saltbase/salt/flannel-server/init.sls | 2 +- cluster/saltbase/salt/flannel/network.json | 8 - cluster/saltbase/salt/top.sls | 3 - cmd/kubelet/app/server.go | 12 +- docs/admin/kubelet.md | 3 +- docs/proposals/flannel-integration.md | 164 ++++++++++++++++-- hack/verify-flags/known-flags.txt | 1 + .../{flannel_server.go => flannel_helper.go} | 16 ++ pkg/kubelet/kubelet.go | 18 +- 12 files changed, 177 insertions(+), 73 deletions(-) delete mode 100644 cluster/saltbase/salt/flannel/network.json rename pkg/kubelet/{flannel_server.go => flannel_helper.go} (88%) diff --git a/cluster/gce/config-default.sh b/cluster/gce/config-default.sh index b6bcf8ba272..8fd2fa8422f 100755 --- a/cluster/gce/config-default.sh +++ b/cluster/gce/config-default.sh @@ -120,4 +120,3 @@ OPENCONTRAIL_PUBLIC_SUBNET="${OPENCONTRAIL_PUBLIC_SUBNET:-10.1.0.0/16}" # Optional: if set to true, kube-up will configure the cluster to run e2e tests. E2E_STORAGE_TEST_ENVIRONMENT=${KUBE_E2E_STORAGE_TEST_ENVIRONMENT:-false} -FIREWALL_ETCD="${FIREWALL_SSH:-${NETWORK}-allow-etcd}" diff --git a/cluster/gce/config-test.sh b/cluster/gce/config-test.sh index 78b1cd69fd2..5eb4046ecc7 100755 --- a/cluster/gce/config-test.sh +++ b/cluster/gce/config-test.sh @@ -134,4 +134,3 @@ OPENCONTRAIL_PUBLIC_SUBNET="${OPENCONTRAIL_PUBLIC_SUBNET:-10.1.0.0/16}" E2E_STORAGE_TEST_ENVIRONMENT=${KUBE_E2E_STORAGE_TEST_ENVIRONMENT:-false} # Overlay network settings OVERLAY_NETWORK=${OVERLAY_NETWORK:-true} -FIREWALL_ETCD="${FIREWALL_SSH:-${NETWORK}-allow-etcd}" diff --git a/cluster/gce/util.sh b/cluster/gce/util.sh index 9e45f27fad5..655221c9d16 100755 --- a/cluster/gce/util.sh +++ b/cluster/gce/util.sh @@ -304,7 +304,7 @@ function create-static-ip { echo -e "${color_red}Failed to create static ip $1 ${color_norm}" >&2 exit 2 fi - attempt=$(($attempt+1)) + attempt=$(($attempt+1)) echo -e "${color_yellow}Attempt $attempt failed to create static ip $1. Retrying.${color_norm}" >&2 sleep $(($attempt * 5)) else @@ -603,28 +603,13 @@ function kube-up { --allow "tcp:22" & fi + echo "Starting master and configuring firewalls" gcloud compute firewall-rules create "${MASTER_NAME}-https" \ --project "${PROJECT}" \ --network "${NETWORK}" \ --target-tags "${MASTER_TAG}" \ --allow tcp:443 & - if [[ "${OVERLAY_NETWORK}" == "true" ]]; then - # TODO: Where to put this? Scope it to flannel setup. - if ! "${GCLOUD}" compute firewall-rules --project "${PROJECT}" describe "${FIREWALL_ETCD}" &>/dev/null; then - "${GCLOUD}" compute firewall-rules create "${FIREWALL_ETCD}" \ - --network="${NETWORK}" \ - --project="${PROJECT}" \ - --source-ranges="10.0.0.0/8" \ - --target-tags "${MINION_TAG}" \ - --allow tcp:4001 & - else - echo "... Using etcd firewall-rule: ${FIREWALL_ETCD}" >&2 - fi - else - echo "Not opening etcd up to the cluster: ${OVERLAY_NETWORK} ${FIREWALL_ETCD}" - fi - # We have to make sure the disk is created before creating the master VM, so # run this in the foreground. gcloud compute disks create "${MASTER_NAME}-pd" \ @@ -687,7 +672,7 @@ function kube-up { write-node-env local template_name="${NODE_INSTANCE_PREFIX}-template" - + create-node-instance-template $template_name gcloud compute instance-groups managed \ diff --git a/cluster/saltbase/salt/flannel-server/init.sls b/cluster/saltbase/salt/flannel-server/init.sls index 154d943adb8..a5b1d2e66c7 100644 --- a/cluster/saltbase/salt/flannel-server/init.sls +++ b/cluster/saltbase/salt/flannel-server/init.sls @@ -8,7 +8,7 @@ touch /var/log/etcd_flannel.log: /etc/kubernetes/network.json: file.managed: - - source: salt://flannel/network.json + - source: salt://flannel-server/network.json - makedirs: True - user: root - group: root diff --git a/cluster/saltbase/salt/flannel/network.json b/cluster/saltbase/salt/flannel/network.json deleted file mode 100644 index c8d8e0788b8..00000000000 --- a/cluster/saltbase/salt/flannel/network.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "Network": "18.16.0.0/16", - "SubnetLen": 24, - "Backend": { - "Type": "vxlan", - "VNI": 1 - } -} diff --git a/cluster/saltbase/salt/top.sls b/cluster/saltbase/salt/top.sls index 5c40a36e44a..45294498427 100644 --- a/cluster/saltbase/salt/top.sls +++ b/cluster/saltbase/salt/top.sls @@ -14,9 +14,6 @@ base: - match: grain - docker - flannel -{% if grains['cloud'] is defined and grains['cloud'] == 'azure' %} - - openvpn-client -{% endif %} - helpers - cadvisor - kube-client-tools diff --git a/cmd/kubelet/app/server.go b/cmd/kubelet/app/server.go index 2bc3e34cf6c..63bc35546d7 100644 --- a/cmd/kubelet/app/server.go +++ b/cmd/kubelet/app/server.go @@ -159,10 +159,7 @@ type KubeletServer struct { // Pull images one at a time. SerializeImagePulls bool - - // Flannel config parameters - UseDefaultOverlay bool - NetworkConfig string + UseDefaultOverlay bool } // bootstrapping interface for kubelet, targets the initialization protocol @@ -237,7 +234,6 @@ func NewKubeletServer() *KubeletServer { KubeAPIBurst: 10, // Flannel parameters UseDefaultOverlay: useDefaultOverlay, - // NetworkConfig: networkConfig, } } @@ -355,7 +351,6 @@ func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) { // Flannel config parameters fs.BoolVar(&s.UseDefaultOverlay, "use-default-overlay", s.UseDefaultOverlay, "Experimental support for starting the kubelet with the default overlay network (flannel). Assumes flanneld is already running in client mode. [default=false]") - fs.StringVar(&s.NetworkConfig, "network-config", s.NetworkConfig, "Absolute path to a network json file, as accepted by flannel.") } // UnsecuredKubeletConfig returns a KubeletConfig suitable for being run, or an error if the server setup @@ -494,9 +489,7 @@ func (s *KubeletServer) UnsecuredKubeletConfig() (*KubeletConfig, error) { Writer: writer, VolumePlugins: ProbeVolumePlugins(), - // Flannel options UseDefaultOverlay: s.UseDefaultOverlay, - NetworkConfig: s.NetworkConfig, }, nil } @@ -969,9 +962,7 @@ type KubeletConfig struct { Writer io.Writer VolumePlugins []volume.VolumePlugin - // Flannel parameters UseDefaultOverlay bool - NetworkConfig string } func CreateAndInitKubelet(kc *KubeletConfig) (k KubeletBootstrap, pc *config.PodConfig, err error) { @@ -1056,7 +1047,6 @@ func CreateAndInitKubelet(kc *KubeletConfig) (k KubeletBootstrap, pc *config.Pod kc.ContainerManager, // Flannel parameters kc.UseDefaultOverlay, - //kc.NetworkConfig, ) if err != nil { diff --git a/docs/admin/kubelet.md b/docs/admin/kubelet.md index 44946ad848b..00f32a89985 100644 --- a/docs/admin/kubelet.md +++ b/docs/admin/kubelet.md @@ -137,9 +137,10 @@ kubelet --system-container="": Optional resource-only container in which to place all non-kernel processes that are not already in a container. Empty for no container. Rolling back the flag requires a reboot. (Default: ""). --tls-cert-file="": File containing x509 Certificate for HTTPS. (CA cert, if any, concatenated after server cert). If --tls-cert-file and --tls-private-key-file are not provided, a self-signed certificate and key are generated for the public address and saved to the directory passed to --cert-dir. --tls-private-key-file="": File containing x509 private key matching --tls-cert-file. + --use-default-overlay[=true]: Experimental support for starting the kubelet with the default overlay network (flannel). Assumes flanneld is already running in client mode. [default=false] ``` -###### Auto generated by spf13/cobra on 21-Nov-2015 +###### Auto generated by spf13/cobra on 23-Nov-2015 diff --git a/docs/proposals/flannel-integration.md b/docs/proposals/flannel-integration.md index 5f33ec3076d..417cab1d364 100644 --- a/docs/proposals/flannel-integration.md +++ b/docs/proposals/flannel-integration.md @@ -1,35 +1,165 @@ + + + + +WARNING +WARNING +WARNING +WARNING +WARNING + +

PLEASE NOTE: This document applies to the HEAD of the source tree

+ +If you are using a released version of Kubernetes, you should +refer to the docs that go with that version. + + +The latest release of this document can be found +[here](http://releases.k8s.io/release-1.1/docs/proposals/flannel-integration.md). + +Documentation for other releases can be found at +[releases.k8s.io](http://releases.k8s.io). + +-- + + + + + # Flannel integration with Kubernetes ## Why? * Networking works out of the box. -* Cloud gateway configuration is regulated. +* Cloud gateway configuration is regulated by quota. * Consistent bare metal and cloud experience. * Lays foundation for integrating with networking backends and vendors. -# How? +## How? + +Thus: ``` -Master Node1 ----------------------|-------------------------------- -database | - | | -{10.250.0.0/16} | docker - | here's podcidr |restart with podcidr -apiserver <------------------- kubelet - | | |here's podcidr -flannel-server:10253 <------- flannel-daemon - --/16---> - <--watch-- [config iptables] - subscribe to new node subnets - --------> [config VXLan] - | +Master | Node1 +---------------------------------------------------------------------- +{192.168.0.0/16, 256 /24} | docker + | | | restart with podcidr +apiserver <------------------ kubelet (sends podcidr) + | | | here's podcidr, mtu +flannel-server:10253 <------------------ flannel-daemon +Allocates a /24 ------------------> [config iptables, VXLan] + <------------------ [watch subnet leases] +I just allocated ------------------> [config VXLan] +another /24 | ``` -There is a tiny lie in the above diagram, as of now, the flannel server on the master maintains a private etcd. This will not be necessary once we have a generalized network resource, and a Kubernetes x flannel backend. +## Proposal + +Explaining vxlan is out of the scope of this document, however it does take some basic understanding to grok the proposal. Assume some pod wants to communicate across nodes with the above setup. Check the flannel vxlan devices: + +```console +node1 $ ip -d link show flannel.1 +4: flannel.1: mtu 1410 qdisc noqueue state UNKNOWN mode DEFAULT + link/ether a2:53:86:b5:5f:c1 brd ff:ff:ff:ff:ff:ff + vxlan +node1 $ ip -d link show eth0 +2: eth0: mtu 1460 qdisc mq state UP mode DEFAULT qlen 1000 + link/ether 42:01:0a:f0:00:04 brd ff:ff:ff:ff:ff:ff + +node2 $ ip -d link show flannel.1 +4: flannel.1: mtu 1410 qdisc noqueue state UNKNOWN mode DEFAULT + link/ether 56:71:35:66:4a:d8 brd ff:ff:ff:ff:ff:ff + vxlan +node2 $ ip -d link show eth0 +2: eth0: mtu 1460 qdisc mq state UP mode DEFAULT qlen 1000 + link/ether 42:01:0a:f0:00:03 brd ff:ff:ff:ff:ff:ff +``` + +Note that we're ignoring cbr0 for the sake of simplicity. Spin-up a container on each node. We're using raw docker for this example only because we want control over where the container lands: + +``` +node1 $ docker run -it radial/busyboxplus:curl /bin/sh +[ root@5ca3c154cde3:/ ]$ ip addr show +1: lo: mtu 65536 qdisc noqueue +8: eth0: mtu 1410 qdisc noqueue + link/ether 02:42:12:10:20:03 brd ff:ff:ff:ff:ff:ff + inet 192.168.32.3/24 scope global eth0 + valid_lft forever preferred_lft forever + +node2 $ docker run -it radial/busyboxplus:curl /bin/sh +[ root@d8a879a29f5d:/ ]$ ip addr show +1: lo: mtu 65536 qdisc noqueue +16: eth0: mtu 1410 qdisc noqueue + link/ether 02:42:12:10:0e:07 brd ff:ff:ff:ff:ff:ff + inet 192.168.14.7/24 scope global eth0 + valid_lft forever preferred_lft forever +[ root@d8a879a29f5d:/ ]$ ping 192.168.32.3 +PING 192.168.32.3 (192.168.32.3): 56 data bytes +64 bytes from 192.168.32.3: seq=0 ttl=62 time=1.190 ms +``` + +__What happened?__: + +From 1000 feet: +* vxlan device driver starts up on node1 and creates a udp tunnel endpoint on 8472 +* container 192.168.32.3 pings 192.168.14.7 + - what's the MAC of 192.168.14.0? + - L2 miss, flannel looks up MAC of subnet + - Stores `192.168.14.0 <-> 56:71:35:66:4a:d8` in neighbor table + - what's tunnel endpoint of this MAC? + - L3 miss, flannel looks up destination VM ip + - Stores `10.240.0.3 <-> 56:71:35:66:4a:d8` in bridge database +* Sends `[56:71:35:66:4a:d8, 10.240.0.3][vxlan: port, vni][02:42:12:10:20:03, 192.168.14.7][icmp]` + +__But will it blend?__ + +Kubernetes integration is fairly straight-forward once we understand the pieces involved, and can be prioritized as follows: +* Kubelet understands flannel daemon in client mode, flannel server manages independent etcd store on master, node controller backs off cidr allocation +* Flannel server consults the Kubernetes master for everything network related +* Flannel daemon works through network plugins in a generic way without bothering the kubelet: needs CNI x Kubernetes standardization + +The first is accomplished in this PR, while a timeline for 2. and 3. are TDB. To implement the flannel api we can either run a proxy per node and get rid of the flannel server, or service all requests in the flannel server with something like a go-routine per node: +* `/network/config`: read network configuration and return +* `/network/leases`: + - Post: Return a lease as understood by flannel + - Lookip node by IP + - Store node metadata from [flannel request] (https://github.com/coreos/flannel/blob/master/subnet/subnet.go#L34) in annotations + - Return [Lease object] (https://github.com/coreos/flannel/blob/master/subnet/subnet.go#L40) reflecting node cidr + - Get: Handle a watch on leases +* `/network/leases/subnet`: + - Put: This is a request for a lease. If the nodecontroller is allocating CIDRs we can probably just no-op. +* `/network/reservations`: TDB, we can probably use this to accomodate node controller allocating CIDR instead of flannel requesting it + +The ick-iest part of this implementation is going to the the `GET /network/leases`, i.e the watch proxy. We can side-step by waiting for a more generic Kubernetes resource. However, we can also implement it as follows: +* Watch all nodes, ignore heartbeats +* On each change, figure out the lease for the node, construct a [lease watch result](https://github.com/coreos/flannel/blob/0bf263826eab1707be5262703a8092c7d15e0be4/subnet/subnet.go#L72), and send it down the watch with the RV from the node +* Implement a lease list that does a similar translation + +I say this is gross without an api objet because for each node->lease translation one has to store and retrieve the node metadata sent by flannel (eg: VTEP) from node annotations. [Reference implementation](https://github.com/bprashanth/kubernetes/blob/network_vxlan/pkg/kubelet/flannel_server.go) and [watch proxy](https://github.com/bprashanth/kubernetes/blob/network_vxlan/pkg/kubelet/watch_proxy.go). # Limitations * Integration is experimental +* Flannel etcd not stored in persistent disk +* CIDR allocation does *not* flow from Kubernetes down to nodes anymore # Wishlist + +This proposal is really just a call for community help in writing a Kubernetes x flannel backend. + +* CNI plugin integration +* Flannel daemon in privileged pod +* Flannel server talks to apiserver, described in proposal above +* HTTPs between flannel daemon/server +* Investigate flannel server runing on every node (as done in the reference implementation mentioned above) +* Use flannel reservation mode to support node controller podcidr alloction + + + +[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/docs/proposals/flannel-integration.md?pixel)]() + diff --git a/hack/verify-flags/known-flags.txt b/hack/verify-flags/known-flags.txt index 2bcb625923a..2472f4bc1aa 100644 --- a/hack/verify-flags/known-flags.txt +++ b/hack/verify-flags/known-flags.txt @@ -327,3 +327,4 @@ watch-only whitelist-override-label windows-line-endings www-prefix +use-default-overlay diff --git a/pkg/kubelet/flannel_server.go b/pkg/kubelet/flannel_helper.go similarity index 88% rename from pkg/kubelet/flannel_server.go rename to pkg/kubelet/flannel_helper.go index d485a84c7e2..91a16e8c369 100644 --- a/pkg/kubelet/flannel_server.go +++ b/pkg/kubelet/flannel_helper.go @@ -1,3 +1,19 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + package kubelet import ( diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 349103a13f8..8e45b69c159 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -328,11 +328,8 @@ func NewMainKubelet( cpuCFSQuota: cpuCFSQuota, daemonEndpoints: daemonEndpoints, containerManager: containerManager, - - // Flannel options - // TODO: This is currently a dummy server. - flannelHelper: NewFlannelHelper(), - useDefaultOverlay: useDefaultOverlay, + flannelHelper: NewFlannelHelper(), + useDefaultOverlay: useDefaultOverlay, } if klet.kubeClient == nil { glog.Infof("Master not setting up flannel overlay") @@ -659,7 +656,6 @@ type Kubelet struct { // oneTimeInitializer is used to initialize modules that are dependent on the runtime to be up. oneTimeInitializer sync.Once - // Flannel options. useDefaultOverlay bool flannelHelper *FlannelHelper } @@ -1129,7 +1125,6 @@ func (kl *Kubelet) syncNodeStatus() { } if kl.registerNode { // This will exit immediately if it doesn't need to do anything. - glog.Infof("(kubelet) registering node with apiserver") kl.registerWithApiserver() } if err := kl.updateNodeStatus(); err != nil { @@ -2588,10 +2583,10 @@ func (kl *Kubelet) updateRuntimeUp() { func (kl *Kubelet) reconcileCBR0(podCIDR string) error { if podCIDR == "" { - glog.V(1).Info("(kubelet) PodCIDR not set. Will not configure cbr0.") + glog.V(5).Info("PodCIDR not set. Will not configure cbr0.") return nil } - glog.V(1).Infof("(kubelet) PodCIDR is set to %q", podCIDR) + glog.V(5).Infof("PodCIDR is set to %q", podCIDR) _, cidr, err := net.ParseCIDR(podCIDR) if err != nil { return err @@ -2634,13 +2629,12 @@ func (kl *Kubelet) syncNetworkStatus() { var err error if kl.configureCBR0 { if kl.useDefaultOverlay { - glog.Infof("(kubelet) handshaking") podCIDR, err := kl.flannelHelper.Handshake() if err != nil { glog.Infof("Flannel server handshake failed %v", err) return } - glog.Infof("(kubelet) setting cidr, currently: %v -> %v", + glog.Infof("Setting cidr: %v -> %v", kl.runtimeState.podCIDR(), podCIDR) kl.runtimeState.setPodCIDR(podCIDR) } @@ -2915,7 +2909,7 @@ func (kl *Kubelet) tryUpdateNodeStatus() error { } else if kl.reconcileCIDR { kl.runtimeState.setPodCIDR(node.Spec.PodCIDR) } - glog.Infof("(kubelet) updating node in apiserver with cidr %v", node.Spec.PodCIDR) + glog.Infof("Updating node in apiserver with cidr %v", node.Spec.PodCIDR) if err := kl.setNodeStatus(node); err != nil { return err From 9aa0efa393a60a632ae023973fa8611a6cd15d1e Mon Sep 17 00:00:00 2001 From: Prashanth Balasubramanian Date: Mon, 23 Nov 2015 18:11:51 -0800 Subject: [PATCH 5/5] Turn flannel off by default --- cluster/gce/config-default.sh | 2 +- cluster/gce/config-test.sh | 4 +- .../flannel-server/flannel-server.manifest | 12 ++--- .../saltbase/salt/flannel-server/network.json | 4 +- cluster/saltbase/salt/flannel/init.sls | 2 + .../kube-controller-manager.manifest | 8 +++- cluster/saltbase/salt/kubelet/default | 7 ++- cluster/saltbase/salt/top.sls | 4 ++ .../app/controllermanager.go | 5 ++- cmd/kubelet/app/server.go | 23 ++++------ docs/admin/kubelet.md | 4 +- hack/verify-flags/known-flags.txt | 2 +- pkg/kubelet/flannel_helper.go | 11 +++-- pkg/kubelet/kubelet.go | 44 ++++++++++++++----- 14 files changed, 83 insertions(+), 49 deletions(-) diff --git a/cluster/gce/config-default.sh b/cluster/gce/config-default.sh index 8fd2fa8422f..3fb21a0f062 100755 --- a/cluster/gce/config-default.sh +++ b/cluster/gce/config-default.sh @@ -113,7 +113,7 @@ ADMISSION_CONTROL=NamespaceLifecycle,LimitRanger,ServiceAccount,ResourceQuota KUBE_UP_AUTOMATIC_CLEANUP=${KUBE_UP_AUTOMATIC_CLEANUP:-false} # OpenContrail networking plugin specific settings -NETWORK_PROVIDER="${NETWORK_PROVIDER:-none}" # opencontrail +NETWORK_PROVIDER="${NETWORK_PROVIDER:-none}" # opencontrail, flannel OPENCONTRAIL_TAG="${OPENCONTRAIL_TAG:-R2.20}" OPENCONTRAIL_KUBERNETES_TAG="${OPENCONTRAIL_KUBERNETES_TAG:-master}" OPENCONTRAIL_PUBLIC_SUBNET="${OPENCONTRAIL_PUBLIC_SUBNET:-10.1.0.0/16}" diff --git a/cluster/gce/config-test.sh b/cluster/gce/config-test.sh index 5eb4046ecc7..d0d4153ace3 100755 --- a/cluster/gce/config-test.sh +++ b/cluster/gce/config-test.sh @@ -125,12 +125,10 @@ KUBE_UP_AUTOMATIC_CLEANUP=${KUBE_UP_AUTOMATIC_CLEANUP:-false} TEST_CLUSTER="${TEST_CLUSTER:-true}" # OpenContrail networking plugin specific settings -NETWORK_PROVIDER="${NETWORK_PROVIDER:-none}" # opencontrail +NETWORK_PROVIDER="${NETWORK_PROVIDER:-none}" # opencontrail,flannel OPENCONTRAIL_TAG="${OPENCONTRAIL_TAG:-R2.20}" OPENCONTRAIL_KUBERNETES_TAG="${OPENCONTRAIL_KUBERNETES_TAG:-master}" OPENCONTRAIL_PUBLIC_SUBNET="${OPENCONTRAIL_PUBLIC_SUBNET:-10.1.0.0/16}" # Optional: if set to true, kube-up will configure the cluster to run e2e tests. E2E_STORAGE_TEST_ENVIRONMENT=${KUBE_E2E_STORAGE_TEST_ENVIRONMENT:-false} -# Overlay network settings -OVERLAY_NETWORK=${OVERLAY_NETWORK:-true} diff --git a/cluster/saltbase/salt/flannel-server/flannel-server.manifest b/cluster/saltbase/salt/flannel-server/flannel-server.manifest index a0811ec0837..906d1354c13 100644 --- a/cluster/saltbase/salt/flannel-server/flannel-server.manifest +++ b/cluster/saltbase/salt/flannel-server/flannel-server.manifest @@ -2,17 +2,17 @@ "kind": "Pod", "apiVersion": "v1", "metadata": { - "name": "flannel-helper", + "name": "flannel-server", "namespace": "kube-system", "labels": { - "app": "flannel-helper", + "app": "flannel-server", "version": "v0.1" } }, "spec": { "volumes": [ { - "name": "varlogflannel", + "name": "varlog", "hostPath": { "path": "/var/log" } @@ -30,8 +30,8 @@ ], "containers": [ { - "name": "flannel-helper", - "image": "bprashanth/flannel-helper:0.1", + "name": "flannel-server-helper", + "image": "gcr.io/google_containers/flannel-server-helper:0.1", "args": [ "--network-config=/etc/kubernetes/network.json", "--etcd-prefix=/kubernetes.io/network", @@ -66,7 +66,7 @@ }, "volumeMounts": [ { - "name": "varlogflannel", + "name": "varlog", "mountPath": "/var/log" } ] diff --git a/cluster/saltbase/salt/flannel-server/network.json b/cluster/saltbase/salt/flannel-server/network.json index 2d199ea86e1..b0a6bd4560a 100644 --- a/cluster/saltbase/salt/flannel-server/network.json +++ b/cluster/saltbase/salt/flannel-server/network.json @@ -1,6 +1,6 @@ { - "Network": "192.168.0.0/16", - "SubnetLen": 26, + "Network": "172.16.0.0/12", + "SubnetLen": 24, "Backend": { "Type": "vxlan", "VNI": 1 diff --git a/cluster/saltbase/salt/flannel/init.sls b/cluster/saltbase/salt/flannel/init.sls index 8f36c953d3a..ee746d241ae 100644 --- a/cluster/saltbase/salt/flannel/init.sls +++ b/cluster/saltbase/salt/flannel/init.sls @@ -1,3 +1,5 @@ +# TODO: Run flannel daemon in a static pod once we've moved the overlay network +# setup into a network plugin. flannel-tar: archive: - extracted diff --git a/cluster/saltbase/salt/kube-controller-manager/kube-controller-manager.manifest b/cluster/saltbase/salt/kube-controller-manager/kube-controller-manager.manifest index cc68b0595e3..d3df6419226 100644 --- a/cluster/saltbase/salt/kube-controller-manager/kube-controller-manager.manifest +++ b/cluster/saltbase/salt/kube-controller-manager/kube-controller-manager.manifest @@ -10,7 +10,11 @@ {% if pillar['cluster_cidr'] is defined and pillar['cluster_cidr'] != "" -%} {% set cluster_cidr = "--cluster-cidr=" + pillar['cluster_cidr'] -%} {% endif -%} -{% if pillar['allocate_node_cidrs'] is defined -%} +# When we're using flannel it is responsible for cidr allocation. +# This is expected to be a short-term compromise. +{% if pillar.get('network_provider', '').lower() == 'flannel' %} + {% set allocate_node_cidrs = "--allocate-node-cidrs=false" -%} +{% elif pillar['allocate_node_cidrs'] is defined -%} {% set allocate_node_cidrs = "--allocate-node-cidrs=" + pillar['allocate_node_cidrs'] -%} {% endif -%} {% if pillar['terminated_pod_gc_threshold'] is defined -%} @@ -39,7 +43,7 @@ {% set root_ca_file = "--root-ca-file=/srv/kubernetes/ca.crt" -%} {% endif -%} -{% set params = "--master=127.0.0.1:8080" + " " + cluster_name + " " + cluster_cidr + " --allocate-node-cidrs=false" + " " + terminated_pod_gc + " " + cloud_provider + " " + cloud_config + service_account_key + pillar['log_level'] + " " + root_ca_file -%} +{% set params = "--master=127.0.0.1:8080" + " " + cluster_name + " " + cluster_cidr + " " + allocate_node_cidrs + " " + terminated_pod_gc + " " + cloud_provider + " " + cloud_config + service_account_key + pillar['log_level'] + " " + root_ca_file -%} # test_args has to be kept at the end, so they'll overwrite any prior configuration diff --git a/cluster/saltbase/salt/kubelet/default b/cluster/saltbase/salt/kubelet/default index 35242a71693..c642e4acc9f 100644 --- a/cluster/saltbase/salt/kubelet/default +++ b/cluster/saltbase/salt/kubelet/default @@ -85,6 +85,11 @@ {% set configure_cbr0 = "--configure-cbr0=" + pillar['allocate_node_cidrs'] -%} {% endif -%} +{% set experimental_flannel_overlay = "" -%} +{% if pillar.get('network_provider', '').lower() == 'flannel' %} + {% set experimental_flannel_overlay = "--experimental-flannel-overlay=true" %} +{% endif -%} + # Run containers under the root cgroup and create a system container. {% set system_container = "" -%} {% set cgroup_root = "" -%} @@ -117,4 +122,4 @@ {% endif -%} # test_args has to be kept at the end, so they'll overwrite any prior configuration -DAEMON_ARGS="{{daemon_args}} {{api_servers_with_port}} {{debugging_handlers}} {{hostname_override}} {{cloud_provider}} {{config}} {{manifest_url}} --allow-privileged={{pillar['allow_privileged']}} {{pillar['log_level']}} {{cluster_dns}} {{cluster_domain}} {{docker_root}} {{kubelet_root}} {{configure_cbr0}} {{cgroup_root}} {{system_container}} {{pod_cidr}} {{ master_kubelet_args }} {{cpu_cfs_quota}} {{network_plugin}} {{test_args}}" +DAEMON_ARGS="{{daemon_args}} {{api_servers_with_port}} {{debugging_handlers}} {{hostname_override}} {{cloud_provider}} {{config}} {{manifest_url}} --allow-privileged={{pillar['allow_privileged']}} {{pillar['log_level']}} {{cluster_dns}} {{cluster_domain}} {{docker_root}} {{kubelet_root}} {{configure_cbr0}} {{cgroup_root}} {{system_container}} {{pod_cidr}} {{ master_kubelet_args }} {{cpu_cfs_quota}} {{network_plugin}} {{experimental_flannel_overlay}} {{test_args}}" diff --git a/cluster/saltbase/salt/top.sls b/cluster/saltbase/salt/top.sls index 45294498427..5789f42ed5a 100644 --- a/cluster/saltbase/salt/top.sls +++ b/cluster/saltbase/salt/top.sls @@ -13,7 +13,9 @@ base: 'roles:kubernetes-pool': - match: grain - docker +{% if pillar.get('network_provider', '').lower() == 'flannel' %} - flannel +{% endif %} - helpers - cadvisor - kube-client-tools @@ -41,8 +43,10 @@ base: - match: grain - generate-cert - etcd +{% if pillar.get('network_provider', '').lower() == 'flannel' %} - flannel-server - flannel +{% endif %} - kube-apiserver - kube-controller-manager - kube-scheduler diff --git a/cmd/kube-controller-manager/app/controllermanager.go b/cmd/kube-controller-manager/app/controllermanager.go index 6c2724c5e5c..4cad923808a 100644 --- a/cmd/kube-controller-manager/app/controllermanager.go +++ b/cmd/kube-controller-manager/app/controllermanager.go @@ -292,8 +292,7 @@ func (s *CMServer) Run(_ []string) error { } if s.AllocateNodeCIDRs { - // TODO: Pipe this as a command line flag that corresponds to overlay==true - if cloud == nil || true { + if cloud == nil { glog.Warning("allocate-node-cidrs is set, but no cloud provider specified. Will not manage routes.") } else if routes, ok := cloud.Routes(); !ok { glog.Warning("allocate-node-cidrs is set, but cloud provider does not support routes. Will not manage routes.") @@ -301,6 +300,8 @@ func (s *CMServer) Run(_ []string) error { routeController := routecontroller.New(routes, kubeClient, s.ClusterName, &s.ClusterCIDR) routeController.Run(s.NodeSyncPeriod) } + } else { + glog.Infof("allocate-node-cidrs set to %v, node controller not creating routes", s.AllocateNodeCIDRs) } resourcequotacontroller.NewResourceQuotaController(kubeClient).Run(s.ResourceQuotaSyncPeriod) diff --git a/cmd/kubelet/app/server.go b/cmd/kubelet/app/server.go index 63bc35546d7..1196159ce4e 100644 --- a/cmd/kubelet/app/server.go +++ b/cmd/kubelet/app/server.go @@ -68,9 +68,8 @@ import ( ) const ( - defaultRootDir = "/var/lib/kubelet" - networkConfig = "/var/run/flannel/network.json" - useDefaultOverlay = true + defaultRootDir = "/var/lib/kubelet" + experimentalFlannelOverlay = false ) // KubeletServer encapsulates all of the parameters necessary for starting up @@ -158,8 +157,8 @@ type KubeletServer struct { KubeAPIBurst int // Pull images one at a time. - SerializeImagePulls bool - UseDefaultOverlay bool + SerializeImagePulls bool + ExperimentalFlannelOverlay bool } // bootstrapping interface for kubelet, targets the initialization protocol @@ -232,8 +231,7 @@ func NewKubeletServer() *KubeletServer { ReconcileCIDR: true, KubeAPIQPS: 5.0, KubeAPIBurst: 10, - // Flannel parameters - UseDefaultOverlay: useDefaultOverlay, + ExperimentalFlannelOverlay: experimentalFlannelOverlay, } } @@ -348,9 +346,7 @@ func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) { fs.Float32Var(&s.KubeAPIQPS, "kube-api-qps", s.KubeAPIQPS, "QPS to use while talking with kubernetes apiserver") fs.IntVar(&s.KubeAPIBurst, "kube-api-burst", s.KubeAPIBurst, "Burst to use while talking with kubernetes apiserver") fs.BoolVar(&s.SerializeImagePulls, "serialize-image-pulls", s.SerializeImagePulls, "Pull images one at a time. We recommend *not* changing the default value on nodes that run docker daemon with version < 1.9 or an Aufs storage backend. Issue #10959 has more details. [default=true]") - - // Flannel config parameters - fs.BoolVar(&s.UseDefaultOverlay, "use-default-overlay", s.UseDefaultOverlay, "Experimental support for starting the kubelet with the default overlay network (flannel). Assumes flanneld is already running in client mode. [default=false]") + fs.BoolVar(&s.ExperimentalFlannelOverlay, "experimental-flannel-overlay", s.ExperimentalFlannelOverlay, "Experimental support for starting the kubelet with the default overlay network (flannel). Assumes flanneld is already running in client mode. [default=false]") } // UnsecuredKubeletConfig returns a KubeletConfig suitable for being run, or an error if the server setup @@ -489,7 +485,7 @@ func (s *KubeletServer) UnsecuredKubeletConfig() (*KubeletConfig, error) { Writer: writer, VolumePlugins: ProbeVolumePlugins(), - UseDefaultOverlay: s.UseDefaultOverlay, + ExperimentalFlannelOverlay: s.ExperimentalFlannelOverlay, }, nil } @@ -962,7 +958,7 @@ type KubeletConfig struct { Writer io.Writer VolumePlugins []volume.VolumePlugin - UseDefaultOverlay bool + ExperimentalFlannelOverlay bool } func CreateAndInitKubelet(kc *KubeletConfig) (k KubeletBootstrap, pc *config.PodConfig, err error) { @@ -1045,8 +1041,7 @@ func CreateAndInitKubelet(kc *KubeletConfig) (k KubeletBootstrap, pc *config.Pod kc.OOMAdjuster, kc.SerializeImagePulls, kc.ContainerManager, - // Flannel parameters - kc.UseDefaultOverlay, + kc.ExperimentalFlannelOverlay, ) if err != nil { diff --git a/docs/admin/kubelet.md b/docs/admin/kubelet.md index 00f32a89985..4597f35020f 100644 --- a/docs/admin/kubelet.md +++ b/docs/admin/kubelet.md @@ -85,6 +85,7 @@ kubelet --enable-server[=true]: Enable the Kubelet's server --event-burst=10: Maximum size of a bursty event records, temporarily allows event records to burst to this number, while still not exceeding event-qps. Only used if --event-qps > 0 --event-qps=5: If > 0, limit event creations per second to this value. If 0, unlimited. + --experimental-flannel-overlay[=false]: Experimental support for starting the kubelet with the default overlay network (flannel). Assumes flanneld is already running in client mode. [default=false] --file-check-frequency=20s: Duration between checking config files for new data --google-json-key="": The Google Cloud Platform Service Account JSON Key to use for authentication. --healthz-bind-address=127.0.0.1: The IP address for the healthz server to serve on, defaulting to 127.0.0.1 (set to 0.0.0.0 for all interfaces) @@ -137,10 +138,9 @@ kubelet --system-container="": Optional resource-only container in which to place all non-kernel processes that are not already in a container. Empty for no container. Rolling back the flag requires a reboot. (Default: ""). --tls-cert-file="": File containing x509 Certificate for HTTPS. (CA cert, if any, concatenated after server cert). If --tls-cert-file and --tls-private-key-file are not provided, a self-signed certificate and key are generated for the public address and saved to the directory passed to --cert-dir. --tls-private-key-file="": File containing x509 private key matching --tls-cert-file. - --use-default-overlay[=true]: Experimental support for starting the kubelet with the default overlay network (flannel). Assumes flanneld is already running in client mode. [default=false] ``` -###### Auto generated by spf13/cobra on 23-Nov-2015 +###### Auto generated by spf13/cobra on 24-Nov-2015 diff --git a/hack/verify-flags/known-flags.txt b/hack/verify-flags/known-flags.txt index 2472f4bc1aa..fc5b2d0990a 100644 --- a/hack/verify-flags/known-flags.txt +++ b/hack/verify-flags/known-flags.txt @@ -327,4 +327,4 @@ watch-only whitelist-override-label windows-line-endings www-prefix -use-default-overlay +experimental-flannel-overlay diff --git a/pkg/kubelet/flannel_helper.go b/pkg/kubelet/flannel_helper.go index 91a16e8c369..c81cb594fec 100644 --- a/pkg/kubelet/flannel_helper.go +++ b/pkg/kubelet/flannel_helper.go @@ -32,6 +32,9 @@ import ( // TODO: Move all this to a network plugin. const ( + // TODO: The location of default docker options is distro specific, so this + // probably won't work on anything other than debian/ubuntu. This is a + // short-term compromise till we've moved overlay setup into a plugin. dockerOptsFile = "/etc/default/docker" flannelSubnetKey = "FLANNEL_SUBNET" flannelNetworkKey = "FLANNEL_NETWORK" @@ -78,7 +81,7 @@ func (f *FlannelHelper) Handshake() (podCIDR string, err error) { if _, err = os.Stat(f.subnetFile); err != nil { return "", fmt.Errorf("Waiting for subnet file %v", f.subnetFile) } - glog.Infof("(kubelet)Found flannel subnet file %v", f.subnetFile) + glog.Infof("Found flannel subnet file %v", f.subnetFile) config, err := parseKVConfig(f.subnetFile) if err != nil { @@ -115,7 +118,7 @@ func writeDockerOptsFromFlannelConfig(flannelConfig map[string]string) error { } opts, ok := dockerOpts[dockerOptsKey] if !ok { - glog.Errorf("(kubelet)Did not find docker opts, writing them") + glog.Errorf("Did not find docker opts, writing them") opts = fmt.Sprintf( " --bridge=cbr0 --iptables=false --ip-masq=false") } else { @@ -139,7 +142,7 @@ func parseKVConfig(filename string) (map[string]string, error) { return config, err } str := string(buff) - glog.Infof("(kubelet) Read kv options %+v from %v", str, filename) + glog.Infof("Read kv options %+v from %v", str, filename) for _, line := range strings.Split(str, "\n") { kv := strings.Split(line, "=") if len(kv) != 2 { @@ -160,6 +163,6 @@ func writeKVConfig(filename string, kv map[string]string) error { for k, v := range kv { content += fmt.Sprintf("%v=%v\n", k, v) } - glog.Warningf("(kubelet)Writing kv options %+v to %v", content, filename) + glog.Warningf("Writing kv options %+v to %v", content, filename) return ioutil.WriteFile(filename, []byte(content), 0644) } diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 8e45b69c159..c8a95a5296c 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -217,7 +217,7 @@ func NewMainKubelet( oomAdjuster *oom.OOMAdjuster, serializeImagePulls bool, containerManager cm.ContainerManager, - useDefaultOverlay bool, + flannelExperimentalOverlay bool, ) (*Kubelet, error) { if rootDirectory == "" { @@ -328,12 +328,19 @@ func NewMainKubelet( cpuCFSQuota: cpuCFSQuota, daemonEndpoints: daemonEndpoints, containerManager: containerManager, + flannelExperimentalOverlay: flannelExperimentalOverlay, flannelHelper: NewFlannelHelper(), - useDefaultOverlay: useDefaultOverlay, + } + if klet.flannelExperimentalOverlay { + glog.Infof("Flannel is in charge of podCIDR and overlay networking.") } if klet.kubeClient == nil { - glog.Infof("Master not setting up flannel overlay") - klet.useDefaultOverlay = false + // The master kubelet cannot wait for the flannel daemon because it is responsible + // for starting up the flannel server in a static pod. So even though the flannel + // daemon runs on the master, it doesn't hold up cluster bootstrap. All the pods + // on the master run with host networking, so the master flannel doesn't care + // even if the network changes. We only need it for the master proxy. + klet.flannelExperimentalOverlay = false } if plug, err := network.InitNetworkPlugin(networkPlugins, networkPluginName, &networkHost{klet}); err != nil { return nil, err @@ -656,8 +663,12 @@ type Kubelet struct { // oneTimeInitializer is used to initialize modules that are dependent on the runtime to be up. oneTimeInitializer sync.Once - useDefaultOverlay bool - flannelHelper *FlannelHelper + flannelExperimentalOverlay bool + + // TODO: Flannelhelper doesn't store any state, we can instantiate it + // on the fly if we're confident the dbus connetions it opens doesn't + // put the system under duress. + flannelHelper *FlannelHelper } func (kl *Kubelet) allSourcesReady() bool { @@ -2628,7 +2639,7 @@ var oldNodeUnschedulable bool func (kl *Kubelet) syncNetworkStatus() { var err error if kl.configureCBR0 { - if kl.useDefaultOverlay { + if kl.flannelExperimentalOverlay { podCIDR, err := kl.flannelHelper.Handshake() if err != nil { glog.Infof("Flannel server handshake failed %v", err) @@ -2903,13 +2914,24 @@ func (kl *Kubelet) tryUpdateNodeStatus() error { if node == nil { return fmt.Errorf("no node instance returned for %q", kl.nodeName) } - // TODO: Actually update the node spec with pod cidr, this is currently a no-op. - if kl.useDefaultOverlay { - node.Spec.PodCIDR = kl.runtimeState.podCIDR() + // Flannel is the authoritative source of pod CIDR, if it's running. + // This is a short term compromise till we get flannel working in + // reservation mode. + if kl.flannelExperimentalOverlay { + flannelPodCIDR := kl.runtimeState.podCIDR() + if node.Spec.PodCIDR != flannelPodCIDR { + node.Spec.PodCIDR = flannelPodCIDR + glog.Infof("Updating podcidr to %v", node.Spec.PodCIDR) + if updatedNode, err := kl.kubeClient.Nodes().Update(node); err != nil { + glog.Warningf("Failed to update podCIDR: %v", err) + } else { + // Update the node resourceVersion so the status update doesn't fail. + node = updatedNode + } + } } else if kl.reconcileCIDR { kl.runtimeState.setPodCIDR(node.Spec.PodCIDR) } - glog.Infof("Updating node in apiserver with cidr %v", node.Spec.PodCIDR) if err := kl.setNodeStatus(node); err != nil { return err