Merge pull request #17761 from gmarek/flannel

Rebase of @bprashant Flannel out of the box
This commit is contained in:
Marek Grabowski 2015-11-25 16:04:35 +01:00
commit 62009344cc
19 changed files with 727 additions and 9 deletions

View File

@ -113,7 +113,7 @@ ADMISSION_CONTROL=NamespaceLifecycle,LimitRanger,ServiceAccount,ResourceQuota
KUBE_UP_AUTOMATIC_CLEANUP=${KUBE_UP_AUTOMATIC_CLEANUP:-false}
# OpenContrail networking plugin specific settings
NETWORK_PROVIDER="${NETWORK_PROVIDER:-none}" # opencontrail
NETWORK_PROVIDER="${NETWORK_PROVIDER:-none}" # opencontrail, flannel
OPENCONTRAIL_TAG="${OPENCONTRAIL_TAG:-R2.20}"
OPENCONTRAIL_KUBERNETES_TAG="${OPENCONTRAIL_KUBERNETES_TAG:-master}"
OPENCONTRAIL_PUBLIC_SUBNET="${OPENCONTRAIL_PUBLIC_SUBNET:-10.1.0.0/16}"

View File

@ -125,7 +125,7 @@ KUBE_UP_AUTOMATIC_CLEANUP=${KUBE_UP_AUTOMATIC_CLEANUP:-false}
TEST_CLUSTER="${TEST_CLUSTER:-true}"
# OpenContrail networking plugin specific settings
NETWORK_PROVIDER="${NETWORK_PROVIDER:-none}" # opencontrail
NETWORK_PROVIDER="${NETWORK_PROVIDER:-none}" # opencontrail,flannel
OPENCONTRAIL_TAG="${OPENCONTRAIL_TAG:-R2.20}"
OPENCONTRAIL_KUBERNETES_TAG="${OPENCONTRAIL_KUBERNETES_TAG:-master}"
OPENCONTRAIL_PUBLIC_SUBNET="${OPENCONTRAIL_PUBLIC_SUBNET:-10.1.0.0/16}"

View File

@ -0,0 +1,99 @@
{
"kind": "Pod",
"apiVersion": "v1",
"metadata": {
"name": "flannel-server",
"namespace": "kube-system",
"labels": {
"app": "flannel-server",
"version": "v0.1"
}
},
"spec": {
"volumes": [
{
"name": "varlog",
"hostPath": {
"path": "/var/log"
}
},
{
"name": "etcdstorage",
"emptyDir": {}
},
{
"name": "networkconfig",
"hostPath": {
"path": "/etc/kubernetes/network.json"
}
}
],
"containers": [
{
"name": "flannel-server-helper",
"image": "gcr.io/google_containers/flannel-server-helper:0.1",
"args": [
"--network-config=/etc/kubernetes/network.json",
"--etcd-prefix=/kubernetes.io/network",
"--etcd-server=http://127.0.0.1:4001"
],
"volumeMounts": [
{
"name": "networkconfig",
"mountPath": "/etc/kubernetes/network.json"
}
],
"imagePullPolicy": "Always"
},
{
"name": "flannel-container",
"image": "quay.io/coreos/flannel:0.5.5",
"command": [
"/bin/sh",
"-c",
"/opt/bin/flanneld -listen 0.0.0.0:10253 -etcd-endpoints http://127.0.0.1:4001 -etcd-prefix /kubernetes.io/network 1>>/var/log/flannel_server.log 2>&1"
],
"ports": [
{
"hostPort": 10253,
"containerPort": 10253
}
],
"resources": {
"limits": {
"cpu": "100m"
}
},
"volumeMounts": [
{
"name": "varlog",
"mountPath": "/var/log"
}
]
},
{
"name": "etcd-container",
"image": "gcr.io/google_containers/etcd:2.2.1",
"command": [
"/bin/sh",
"-c",
"/opt/bin/etcd --listen-peer-urls http://127.0.0.1:4001 --addr http://127.0.0.1:4001 --bind-addr 127.0.0.1:4001 --data-dir /var/etcd/data 1>>/var/log/etcd_flannel.log 2>&1"
],
"resources": {
"limits": {
"cpu": "100m",
"memory": "50Mi"
}
},
"volumeMounts": [
{
"name": "etcdstorage",
"mountPath": "/var/etcd/data"
}
]
}
],
"hostNetwork": true
}
}

View File

@ -0,0 +1,24 @@
touch /var/log/flannel.log:
cmd.run:
- creates: /var/log/flannel.log
touch /var/log/etcd_flannel.log:
cmd.run:
- creates: /var/log/etcd_flannel.log
/etc/kubernetes/network.json:
file.managed:
- source: salt://flannel-server/network.json
- makedirs: True
- user: root
- group: root
- mode: 755
/etc/kubernetes/manifests/flannel-server.manifest:
file.managed:
- source: salt://flannel-server/flannel-server.manifest
- user: root
- group: root
- mode: 644
- makedirs: true
- dir_mode: 755

View File

@ -0,0 +1,8 @@
{
"Network": "172.16.0.0/12",
"SubnetLen": 24,
"Backend": {
"Type": "vxlan",
"VNI": 1
}
}

View File

@ -0,0 +1,6 @@
{% if grains.api_servers is defined -%}
{% set daemon_args = "-remote " + grains.api_servers + ":10253" -%}
{% else -%}
{% set daemon_args = "-remote 127.0.0.1:10253" -%}
{% endif -%}
DAEMON_ARGS="{{daemon_args}}"

View File

@ -0,0 +1,44 @@
# TODO: Run flannel daemon in a static pod once we've moved the overlay network
# setup into a network plugin.
flannel-tar:
archive:
- extracted
- user: root
- name: /usr/local/src
- makedirs: True
- source: https://github.com/coreos/flannel/releases/download/v0.5.5/flannel-0.5.5-linux-amd64.tar.gz
- tar_options: v
- source_hash: md5=972c717254775bef528f040af804f2cc
- archive_format: tar
- if_missing: /usr/local/src/flannel/flannel-0.5.5/
flannel-symlink:
file.symlink:
- name: /usr/local/bin/flanneld
- target: /usr/local/src/flannel-0.5.5/flanneld
- force: true
- watch:
- archive: flannel-tar
/etc/default/flannel:
file.managed:
- source: salt://flannel/default
- template: jinja
- user: root
- group: root
- mode: 644
/etc/init.d/flannel:
file.managed:
- source: salt://flannel/initd
- user: root
- group: root
- mode: 755
flannel:
service.running:
- enable: True
- watch:
- file: /usr/local/bin/flanneld
- file: /etc/init.d/flannel
- file: /etc/default/flannel

View File

@ -0,0 +1,126 @@
#!/bin/bash
#
### BEGIN INIT INFO
# Provides: flanneld
# Required-Start: $local_fs $network $syslog
# Required-Stop:
# Default-Start: 2 3 4 5
# Default-Stop: 0 1 6
# Short-Description: Flannel daemon
# Description:
# Flannel daemon.
### END INIT INFO
# PATH should only include /usr/* if it runs after the mountnfs.sh script
PATH=/sbin:/usr/sbin:/bin:/usr/bin
DESC="Flannel overlay network daemon"
NAME=flannel
DAEMON=/usr/local/bin/flanneld
DAEMON_ARGS=""
DAEMON_LOG_FILE=/var/log/$NAME.log
PIDFILE=/var/run/$NAME.pid
SCRIPTNAME=/etc/init.d/$NAME
DAEMON_USER=root
# Exit if the package is not installed
[ -x "$DAEMON" ] || exit 0
# Read configuration variable file if it is present
[ -r /etc/default/$NAME ] && . /etc/default/$NAME
# Define LSB log_* functions.
# Depend on lsb-base (>= 3.2-14) to ensure that this file is present
# and status_of_proc is working.
. /lib/lsb/init-functions
#
# Function that starts the daemon/service
#
do_start()
{
# Avoid a potential race at boot time when both monit and init.d start
# the same service
PIDS=$(pidof $DAEMON)
for PID in ${PIDS}; do
kill -9 $PID
done
# Return
# 0 if daemon has been started
# 1 if daemon was already running
# 2 if daemon could not be started
start-stop-daemon --start --quiet --background --no-close \
--make-pidfile --pidfile $PIDFILE \
--exec $DAEMON -c $DAEMON_USER --test > /dev/null \
|| return 1
start-stop-daemon --start --quiet --background --no-close \
--make-pidfile --pidfile $PIDFILE \
--exec $DAEMON -c $DAEMON_USER -- \
$DAEMON_ARGS >> $DAEMON_LOG_FILE 2>&1 \
|| return 2
}
#
# Function that stops the daemon/service
#
do_stop()
{
# Return
# 0 if daemon has been stopped
# 1 if daemon was already stopped
# 2 if daemon could not be stopped
# other if a failure occurred
start-stop-daemon --stop --quiet --retry=TERM/30/KILL/5 --pidfile $PIDFILE --name $NAME
RETVAL="$?"
[ "$RETVAL" = 2 ] && return 2
# Many daemons don't delete their pidfiles when they exit.
rm -f $PIDFILE
return "$RETVAL"
}
case "$1" in
start)
log_daemon_msg "Starting $DESC" "$NAME"
do_start
case "$?" in
0|1) log_end_msg 0 || exit 0 ;;
2) log_end_msg 1 || exit 1 ;;
esac
;;
stop)
log_daemon_msg "Stopping $DESC" "$NAME"
do_stop
case "$?" in
0|1) log_end_msg 0 ;;
2) exit 1 ;;
esac
;;
status)
status_of_proc -p $PIDFILE "$DAEMON" "$NAME" && exit 0 || exit $?
;;
restart|force-reload)
log_daemon_msg "Restarting $DESC" "$NAME"
do_stop
case "$?" in
0|1)
do_start
case "$?" in
0) log_end_msg 0 ;;
1) log_end_msg 1 ;; # Old process is still running
*) log_end_msg 1 ;; # Failed to start
esac
;;
*)
# Failed to stop
log_end_msg 1
;;
esac
;;
*)
echo "Usage: $SCRIPTNAME {start|stop|status|restart|force-reload}" >&2
exit 3
;;
esac

View File

@ -10,7 +10,11 @@
{% if pillar['cluster_cidr'] is defined and pillar['cluster_cidr'] != "" -%}
{% set cluster_cidr = "--cluster-cidr=" + pillar['cluster_cidr'] -%}
{% endif -%}
{% if pillar['allocate_node_cidrs'] is defined -%}
# When we're using flannel it is responsible for cidr allocation.
# This is expected to be a short-term compromise.
{% if pillar.get('network_provider', '').lower() == 'flannel' %}
{% set allocate_node_cidrs = "--allocate-node-cidrs=false" -%}
{% elif pillar['allocate_node_cidrs'] is defined -%}
{% set allocate_node_cidrs = "--allocate-node-cidrs=" + pillar['allocate_node_cidrs'] -%}
{% endif -%}
{% if pillar['terminated_pod_gc_threshold'] is defined -%}

View File

@ -85,6 +85,11 @@
{% set configure_cbr0 = "--configure-cbr0=" + pillar['allocate_node_cidrs'] -%}
{% endif -%}
{% set experimental_flannel_overlay = "" -%}
{% if pillar.get('network_provider', '').lower() == 'flannel' %}
{% set experimental_flannel_overlay = "--experimental-flannel-overlay=true" %}
{% endif -%}
# Run containers under the root cgroup and create a system container.
{% set system_container = "" -%}
{% set cgroup_root = "" -%}
@ -117,4 +122,4 @@
{% endif -%}
# test_args has to be kept at the end, so they'll overwrite any prior configuration
DAEMON_ARGS="{{daemon_args}} {{api_servers_with_port}} {{debugging_handlers}} {{hostname_override}} {{cloud_provider}} {{config}} {{manifest_url}} --allow-privileged={{pillar['allow_privileged']}} {{pillar['log_level']}} {{cluster_dns}} {{cluster_domain}} {{docker_root}} {{kubelet_root}} {{configure_cbr0}} {{cgroup_root}} {{system_container}} {{pod_cidr}} {{ master_kubelet_args }} {{cpu_cfs_quota}} {{network_plugin}} {{test_args}}"
DAEMON_ARGS="{{daemon_args}} {{api_servers_with_port}} {{debugging_handlers}} {{hostname_override}} {{cloud_provider}} {{config}} {{manifest_url}} --allow-privileged={{pillar['allow_privileged']}} {{pillar['log_level']}} {{cluster_dns}} {{cluster_domain}} {{docker_root}} {{kubelet_root}} {{configure_cbr0}} {{cgroup_root}} {{system_container}} {{pod_cidr}} {{ master_kubelet_args }} {{cpu_cfs_quota}} {{network_plugin}} {{experimental_flannel_overlay}} {{test_args}}"

View File

@ -13,6 +13,9 @@ base:
'roles:kubernetes-pool':
- match: grain
- docker
{% if pillar.get('network_provider', '').lower() == 'flannel' %}
- flannel
{% endif %}
- helpers
- cadvisor
- kube-client-tools
@ -40,6 +43,10 @@ base:
- match: grain
- generate-cert
- etcd
{% if pillar.get('network_provider', '').lower() == 'flannel' %}
- flannel-server
- flannel
{% endif %}
- kube-apiserver
- kube-controller-manager
- kube-scheduler

View File

@ -300,6 +300,8 @@ func (s *CMServer) Run(_ []string) error {
routeController := routecontroller.New(routes, kubeClient, s.ClusterName, &s.ClusterCIDR)
routeController.Run(s.NodeSyncPeriod)
}
} else {
glog.Infof("allocate-node-cidrs set to %v, node controller not creating routes", s.AllocateNodeCIDRs)
}
resourcequotacontroller.NewResourceQuotaController(kubeClient).Run(s.ResourceQuotaSyncPeriod)

View File

@ -67,7 +67,10 @@ import (
"k8s.io/kubernetes/pkg/cloudprovider"
)
const defaultRootDir = "/var/lib/kubelet"
const (
defaultRootDir = "/var/lib/kubelet"
experimentalFlannelOverlay = false
)
// KubeletServer encapsulates all of the parameters necessary for starting up
// a kubelet. These can either be set via command line or directly.
@ -154,7 +157,8 @@ type KubeletServer struct {
KubeAPIBurst int
// Pull images one at a time.
SerializeImagePulls bool
SerializeImagePulls bool
ExperimentalFlannelOverlay bool
}
// bootstrapping interface for kubelet, targets the initialization protocol
@ -227,6 +231,7 @@ func NewKubeletServer() *KubeletServer {
ReconcileCIDR: true,
KubeAPIQPS: 5.0,
KubeAPIBurst: 10,
ExperimentalFlannelOverlay: experimentalFlannelOverlay,
}
}
@ -341,6 +346,7 @@ func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) {
fs.Float32Var(&s.KubeAPIQPS, "kube-api-qps", s.KubeAPIQPS, "QPS to use while talking with kubernetes apiserver")
fs.IntVar(&s.KubeAPIBurst, "kube-api-burst", s.KubeAPIBurst, "Burst to use while talking with kubernetes apiserver")
fs.BoolVar(&s.SerializeImagePulls, "serialize-image-pulls", s.SerializeImagePulls, "Pull images one at a time. We recommend *not* changing the default value on nodes that run docker daemon with version < 1.9 or an Aufs storage backend. Issue #10959 has more details. [default=true]")
fs.BoolVar(&s.ExperimentalFlannelOverlay, "experimental-flannel-overlay", s.ExperimentalFlannelOverlay, "Experimental support for starting the kubelet with the default overlay network (flannel). Assumes flanneld is already running in client mode. [default=false]")
}
// UnsecuredKubeletConfig returns a KubeletConfig suitable for being run, or an error if the server setup
@ -478,6 +484,8 @@ func (s *KubeletServer) UnsecuredKubeletConfig() (*KubeletConfig, error) {
TLSOptions: tlsOptions,
Writer: writer,
VolumePlugins: ProbeVolumePlugins(),
ExperimentalFlannelOverlay: s.ExperimentalFlannelOverlay,
}, nil
}
@ -949,6 +957,8 @@ type KubeletConfig struct {
TLSOptions *kubelet.TLSOptions
Writer io.Writer
VolumePlugins []volume.VolumePlugin
ExperimentalFlannelOverlay bool
}
func CreateAndInitKubelet(kc *KubeletConfig) (k KubeletBootstrap, pc *config.PodConfig, err error) {
@ -1031,6 +1041,7 @@ func CreateAndInitKubelet(kc *KubeletConfig) (k KubeletBootstrap, pc *config.Pod
kc.OOMAdjuster,
kc.SerializeImagePulls,
kc.ContainerManager,
kc.ExperimentalFlannelOverlay,
)
if err != nil {

View File

@ -85,6 +85,7 @@ kubelet
--enable-server[=true]: Enable the Kubelet's server
--event-burst=10: Maximum size of a bursty event records, temporarily allows event records to burst to this number, while still not exceeding event-qps. Only used if --event-qps > 0
--event-qps=5: If > 0, limit event creations per second to this value. If 0, unlimited.
--experimental-flannel-overlay[=false]: Experimental support for starting the kubelet with the default overlay network (flannel). Assumes flanneld is already running in client mode. [default=false]
--file-check-frequency=20s: Duration between checking config files for new data
--google-json-key="": The Google Cloud Platform Service Account JSON Key to use for authentication.
--healthz-bind-address=127.0.0.1: The IP address for the healthz server to serve on, defaulting to 127.0.0.1 (set to 0.0.0.0 for all interfaces)
@ -139,7 +140,7 @@ kubelet
--tls-private-key-file="": File containing x509 private key matching --tls-cert-file.
```
###### Auto generated by spf13/cobra on 21-Nov-2015
###### Auto generated by spf13/cobra on 24-Nov-2015
<!-- BEGIN MUNGE: GENERATED_ANALYTICS -->

View File

@ -0,0 +1,165 @@
<!-- BEGIN MUNGE: UNVERSIONED_WARNING -->
<!-- BEGIN STRIP_FOR_RELEASE -->
<img src="http://kubernetes.io/img/warning.png" alt="WARNING"
width="25" height="25">
<img src="http://kubernetes.io/img/warning.png" alt="WARNING"
width="25" height="25">
<img src="http://kubernetes.io/img/warning.png" alt="WARNING"
width="25" height="25">
<img src="http://kubernetes.io/img/warning.png" alt="WARNING"
width="25" height="25">
<img src="http://kubernetes.io/img/warning.png" alt="WARNING"
width="25" height="25">
<h2>PLEASE NOTE: This document applies to the HEAD of the source tree</h2>
If you are using a released version of Kubernetes, you should
refer to the docs that go with that version.
<strong>
The latest release of this document can be found
[here](http://releases.k8s.io/release-1.1/docs/proposals/flannel-integration.md).
Documentation for other releases can be found at
[releases.k8s.io](http://releases.k8s.io).
</strong>
--
<!-- END STRIP_FOR_RELEASE -->
<!-- END MUNGE: UNVERSIONED_WARNING -->
# Flannel integration with Kubernetes
## Why?
* Networking works out of the box.
* Cloud gateway configuration is regulated by quota.
* Consistent bare metal and cloud experience.
* Lays foundation for integrating with networking backends and vendors.
## How?
Thus:
```
Master | Node1
----------------------------------------------------------------------
{192.168.0.0/16, 256 /24} | docker
| | | restart with podcidr
apiserver <------------------ kubelet (sends podcidr)
| | | here's podcidr, mtu
flannel-server:10253 <------------------ flannel-daemon
Allocates a /24 ------------------> [config iptables, VXLan]
<------------------ [watch subnet leases]
I just allocated ------------------> [config VXLan]
another /24 |
```
## Proposal
Explaining vxlan is out of the scope of this document, however it does take some basic understanding to grok the proposal. Assume some pod wants to communicate across nodes with the above setup. Check the flannel vxlan devices:
```console
node1 $ ip -d link show flannel.1
4: flannel.1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1410 qdisc noqueue state UNKNOWN mode DEFAULT
link/ether a2:53:86:b5:5f:c1 brd ff:ff:ff:ff:ff:ff
vxlan
node1 $ ip -d link show eth0
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1460 qdisc mq state UP mode DEFAULT qlen 1000
link/ether 42:01:0a:f0:00:04 brd ff:ff:ff:ff:ff:ff
node2 $ ip -d link show flannel.1
4: flannel.1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1410 qdisc noqueue state UNKNOWN mode DEFAULT
link/ether 56:71:35:66:4a:d8 brd ff:ff:ff:ff:ff:ff
vxlan
node2 $ ip -d link show eth0
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1460 qdisc mq state UP mode DEFAULT qlen 1000
link/ether 42:01:0a:f0:00:03 brd ff:ff:ff:ff:ff:ff
```
Note that we're ignoring cbr0 for the sake of simplicity. Spin-up a container on each node. We're using raw docker for this example only because we want control over where the container lands:
```
node1 $ docker run -it radial/busyboxplus:curl /bin/sh
[ root@5ca3c154cde3:/ ]$ ip addr show
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue
8: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1410 qdisc noqueue
link/ether 02:42:12:10:20:03 brd ff:ff:ff:ff:ff:ff
inet 192.168.32.3/24 scope global eth0
valid_lft forever preferred_lft forever
node2 $ docker run -it radial/busyboxplus:curl /bin/sh
[ root@d8a879a29f5d:/ ]$ ip addr show
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue
16: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1410 qdisc noqueue
link/ether 02:42:12:10:0e:07 brd ff:ff:ff:ff:ff:ff
inet 192.168.14.7/24 scope global eth0
valid_lft forever preferred_lft forever
[ root@d8a879a29f5d:/ ]$ ping 192.168.32.3
PING 192.168.32.3 (192.168.32.3): 56 data bytes
64 bytes from 192.168.32.3: seq=0 ttl=62 time=1.190 ms
```
__What happened?__:
From 1000 feet:
* vxlan device driver starts up on node1 and creates a udp tunnel endpoint on 8472
* container 192.168.32.3 pings 192.168.14.7
- what's the MAC of 192.168.14.0?
- L2 miss, flannel looks up MAC of subnet
- Stores `192.168.14.0 <-> 56:71:35:66:4a:d8` in neighbor table
- what's tunnel endpoint of this MAC?
- L3 miss, flannel looks up destination VM ip
- Stores `10.240.0.3 <-> 56:71:35:66:4a:d8` in bridge database
* Sends `[56:71:35:66:4a:d8, 10.240.0.3][vxlan: port, vni][02:42:12:10:20:03, 192.168.14.7][icmp]`
__But will it blend?__
Kubernetes integration is fairly straight-forward once we understand the pieces involved, and can be prioritized as follows:
* Kubelet understands flannel daemon in client mode, flannel server manages independent etcd store on master, node controller backs off cidr allocation
* Flannel server consults the Kubernetes master for everything network related
* Flannel daemon works through network plugins in a generic way without bothering the kubelet: needs CNI x Kubernetes standardization
The first is accomplished in this PR, while a timeline for 2. and 3. are TDB. To implement the flannel api we can either run a proxy per node and get rid of the flannel server, or service all requests in the flannel server with something like a go-routine per node:
* `/network/config`: read network configuration and return
* `/network/leases`:
- Post: Return a lease as understood by flannel
- Lookip node by IP
- Store node metadata from [flannel request] (https://github.com/coreos/flannel/blob/master/subnet/subnet.go#L34) in annotations
- Return [Lease object] (https://github.com/coreos/flannel/blob/master/subnet/subnet.go#L40) reflecting node cidr
- Get: Handle a watch on leases
* `/network/leases/subnet`:
- Put: This is a request for a lease. If the nodecontroller is allocating CIDRs we can probably just no-op.
* `/network/reservations`: TDB, we can probably use this to accomodate node controller allocating CIDR instead of flannel requesting it
The ick-iest part of this implementation is going to the the `GET /network/leases`, i.e the watch proxy. We can side-step by waiting for a more generic Kubernetes resource. However, we can also implement it as follows:
* Watch all nodes, ignore heartbeats
* On each change, figure out the lease for the node, construct a [lease watch result](https://github.com/coreos/flannel/blob/0bf263826eab1707be5262703a8092c7d15e0be4/subnet/subnet.go#L72), and send it down the watch with the RV from the node
* Implement a lease list that does a similar translation
I say this is gross without an api objet because for each node->lease translation one has to store and retrieve the node metadata sent by flannel (eg: VTEP) from node annotations. [Reference implementation](https://github.com/bprashanth/kubernetes/blob/network_vxlan/pkg/kubelet/flannel_server.go) and [watch proxy](https://github.com/bprashanth/kubernetes/blob/network_vxlan/pkg/kubelet/watch_proxy.go).
# Limitations
* Integration is experimental
* Flannel etcd not stored in persistent disk
* CIDR allocation does *not* flow from Kubernetes down to nodes anymore
# Wishlist
This proposal is really just a call for community help in writing a Kubernetes x flannel backend.
* CNI plugin integration
* Flannel daemon in privileged pod
* Flannel server talks to apiserver, described in proposal above
* HTTPs between flannel daemon/server
* Investigate flannel server runing on every node (as done in the reference implementation mentioned above)
* Use flannel reservation mode to support node controller podcidr alloction
<!-- BEGIN MUNGE: GENERATED_ANALYTICS -->
[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/docs/proposals/flannel-integration.md?pixel)]()
<!-- END MUNGE: GENERATED_ANALYTICS -->

View File

@ -327,3 +327,4 @@ watch-only
whitelist-override-label
windows-line-endings
www-prefix
experimental-flannel-overlay

View File

@ -0,0 +1,168 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package kubelet
import (
"fmt"
"io/ioutil"
"os"
"strconv"
"strings"
utildbus "k8s.io/kubernetes/pkg/util/dbus"
utilexec "k8s.io/kubernetes/pkg/util/exec"
utiliptables "k8s.io/kubernetes/pkg/util/iptables"
"github.com/golang/glog"
)
// TODO: Move all this to a network plugin.
const (
// TODO: The location of default docker options is distro specific, so this
// probably won't work on anything other than debian/ubuntu. This is a
// short-term compromise till we've moved overlay setup into a plugin.
dockerOptsFile = "/etc/default/docker"
flannelSubnetKey = "FLANNEL_SUBNET"
flannelNetworkKey = "FLANNEL_NETWORK"
flannelMtuKey = "FLANNEL_MTU"
dockerOptsKey = "DOCKER_OPTS"
flannelSubnetFile = "/var/run/flannel/subnet.env"
)
// A Kubelet to flannel bridging helper.
type FlannelHelper struct {
subnetFile string
iptablesHelper utiliptables.Interface
}
// NewFlannelHelper creates a new flannel helper.
func NewFlannelHelper() *FlannelHelper {
return &FlannelHelper{
subnetFile: flannelSubnetFile,
iptablesHelper: utiliptables.New(utilexec.New(), utildbus.New(), utiliptables.ProtocolIpv4),
}
}
// Ensure the required MASQUERADE rules exist for the given network/cidr.
func (f *FlannelHelper) ensureFlannelMasqRule(kubeNetwork, podCIDR string) error {
// TODO: Investigate delegation to flannel via -ip-masq=true once flannel
// issue #374 is resolved.
comment := "Flannel masquerade facilitates pod<->node traffic."
args := []string{
"-m", "comment", "--comment", comment,
"!", "-d", kubeNetwork, "-s", podCIDR, "-j", "MASQUERADE",
}
_, err := f.iptablesHelper.EnsureRule(
utiliptables.Append,
utiliptables.TableNAT,
utiliptables.ChainPostrouting,
args...)
return err
}
// Handshake waits for the flannel subnet file and installs a few IPTables
// rules, returning the pod CIDR allocated for this node.
func (f *FlannelHelper) Handshake() (podCIDR string, err error) {
// TODO: Using a file to communicate is brittle
if _, err = os.Stat(f.subnetFile); err != nil {
return "", fmt.Errorf("Waiting for subnet file %v", f.subnetFile)
}
glog.Infof("Found flannel subnet file %v", f.subnetFile)
config, err := parseKVConfig(f.subnetFile)
if err != nil {
return "", err
}
if err = writeDockerOptsFromFlannelConfig(config); err != nil {
return "", err
}
podCIDR, ok := config[flannelSubnetKey]
if !ok {
return "", fmt.Errorf("No flannel subnet, config %+v", config)
}
kubeNetwork, ok := config[flannelNetworkKey]
if !ok {
return "", fmt.Errorf("No flannel network, config %+v", config)
}
if f.ensureFlannelMasqRule(kubeNetwork, podCIDR); err != nil {
return "", fmt.Errorf("Unable to install flannel masquerade %v", err)
}
return podCIDR, nil
}
// Take env variables from flannel subnet env and write to /etc/docker/defaults.
func writeDockerOptsFromFlannelConfig(flannelConfig map[string]string) error {
// TODO: Write dockeropts to unit file on systemd machines
// https://github.com/docker/docker/issues/9889
mtu, ok := flannelConfig[flannelMtuKey]
if !ok {
return fmt.Errorf("No flannel mtu, flannel config %+v", flannelConfig)
}
dockerOpts, err := parseKVConfig(dockerOptsFile)
if err != nil {
return err
}
opts, ok := dockerOpts[dockerOptsKey]
if !ok {
glog.Errorf("Did not find docker opts, writing them")
opts = fmt.Sprintf(
" --bridge=cbr0 --iptables=false --ip-masq=false")
} else {
opts, _ = strconv.Unquote(opts)
}
dockerOpts[dockerOptsKey] = fmt.Sprintf("\"%v --mtu=%v\"", opts, mtu)
if err = writeKVConfig(dockerOptsFile, dockerOpts); err != nil {
return err
}
return nil
}
// parseKVConfig takes a file with key-value env variables and returns a dictionary mapping the same.
func parseKVConfig(filename string) (map[string]string, error) {
config := map[string]string{}
if _, err := os.Stat(filename); err != nil {
return config, err
}
buff, err := ioutil.ReadFile(filename)
if err != nil {
return config, err
}
str := string(buff)
glog.Infof("Read kv options %+v from %v", str, filename)
for _, line := range strings.Split(str, "\n") {
kv := strings.Split(line, "=")
if len(kv) != 2 {
glog.Warningf("Ignoring non key-value pair %v", kv)
continue
}
config[string(kv[0])] = string(kv[1])
}
return config, nil
}
// writeKVConfig writes a kv map as env variables into the given file.
func writeKVConfig(filename string, kv map[string]string) error {
if _, err := os.Stat(filename); err != nil {
return err
}
content := ""
for k, v := range kv {
content += fmt.Sprintf("%v=%v\n", k, v)
}
glog.Warningf("Writing kv options %+v to %v", content, filename)
return ioutil.WriteFile(filename, []byte(content), 0644)
}

View File

@ -217,6 +217,7 @@ func NewMainKubelet(
oomAdjuster *oom.OOMAdjuster,
serializeImagePulls bool,
containerManager cm.ContainerManager,
flannelExperimentalOverlay bool,
) (*Kubelet, error) {
if rootDirectory == "" {
@ -327,8 +328,20 @@ func NewMainKubelet(
cpuCFSQuota: cpuCFSQuota,
daemonEndpoints: daemonEndpoints,
containerManager: containerManager,
flannelExperimentalOverlay: flannelExperimentalOverlay,
flannelHelper: NewFlannelHelper(),
}
if klet.flannelExperimentalOverlay {
glog.Infof("Flannel is in charge of podCIDR and overlay networking.")
}
if klet.kubeClient == nil {
// The master kubelet cannot wait for the flannel daemon because it is responsible
// for starting up the flannel server in a static pod. So even though the flannel
// daemon runs on the master, it doesn't hold up cluster bootstrap. All the pods
// on the master run with host networking, so the master flannel doesn't care
// even if the network changes. We only need it for the master proxy.
klet.flannelExperimentalOverlay = false
}
if plug, err := network.InitNetworkPlugin(networkPlugins, networkPluginName, &networkHost{klet}); err != nil {
return nil, err
} else {
@ -649,6 +662,13 @@ type Kubelet struct {
// oneTimeInitializer is used to initialize modules that are dependent on the runtime to be up.
oneTimeInitializer sync.Once
flannelExperimentalOverlay bool
// TODO: Flannelhelper doesn't store any state, we can instantiate it
// on the fly if we're confident the dbus connetions it opens doesn't
// put the system under duress.
flannelHelper *FlannelHelper
}
func (kl *Kubelet) allSourcesReady() bool {
@ -2619,6 +2639,16 @@ var oldNodeUnschedulable bool
func (kl *Kubelet) syncNetworkStatus() {
var err error
if kl.configureCBR0 {
if kl.flannelExperimentalOverlay {
podCIDR, err := kl.flannelHelper.Handshake()
if err != nil {
glog.Infof("Flannel server handshake failed %v", err)
return
}
glog.Infof("Setting cidr: %v -> %v",
kl.runtimeState.podCIDR(), podCIDR)
kl.runtimeState.setPodCIDR(podCIDR)
}
if err := ensureIPTablesMasqRule(); err != nil {
err = fmt.Errorf("Error on adding ip table rules: %v", err)
glog.Error(err)
@ -2884,7 +2914,22 @@ func (kl *Kubelet) tryUpdateNodeStatus() error {
if node == nil {
return fmt.Errorf("no node instance returned for %q", kl.nodeName)
}
if kl.reconcileCIDR {
// Flannel is the authoritative source of pod CIDR, if it's running.
// This is a short term compromise till we get flannel working in
// reservation mode.
if kl.flannelExperimentalOverlay {
flannelPodCIDR := kl.runtimeState.podCIDR()
if node.Spec.PodCIDR != flannelPodCIDR {
node.Spec.PodCIDR = flannelPodCIDR
glog.Infof("Updating podcidr to %v", node.Spec.PodCIDR)
if updatedNode, err := kl.kubeClient.Nodes().Update(node); err != nil {
glog.Warningf("Failed to update podCIDR: %v", err)
} else {
// Update the node resourceVersion so the status update doesn't fail.
node = updatedNode
}
}
} else if kl.reconcileCIDR {
kl.runtimeState.setPodCIDR(node.Spec.PodCIDR)
}

View File

@ -32,6 +32,8 @@ const (
// ControllerManagerPort is the default port for the controller manager status server.
// May be overridden by a flag at startup.
ControllerManagerPort = 10252
// Port for flannel daemon.
FlannelDaemonPort = 10253
// KubeletReadOnlyPort exposes basic read-only services from the kubelet.
// May be overridden by a flag at startup.
// This is necessary for heapster to collect monitoring stats from the kubelet