From 8de2fbfa0ab2860e9bfe827bb5de975b71db6a87 Mon Sep 17 00:00:00 2001
From: Prashanth Balasubramanian <beeps@google.com>
Date: Mon, 29 Feb 2016 19:40:15 -0800
Subject: [PATCH] Kube2sky synchronously waits for the Kubernetes service.

---
 cluster/addons/dns/kube2sky/Changelog   |  4 ++
 cluster/addons/dns/kube2sky/Makefile    |  2 +-
 cluster/addons/dns/kube2sky/kube2sky.go | 70 ++++++++++++++++++++++++-
 cluster/addons/dns/skydns-rc.yaml.in    | 64 +++++++++++++---------
 4 files changed, 112 insertions(+), 28 deletions(-)

diff --git a/cluster/addons/dns/kube2sky/Changelog b/cluster/addons/dns/kube2sky/Changelog
index 6aa808a596d..5c819938727 100644
--- a/cluster/addons/dns/kube2sky/Changelog
+++ b/cluster/addons/dns/kube2sky/Changelog
@@ -1,3 +1,7 @@
+## Version 1.13 (Mar 1 2016 Prashanth.B <beeps@google.com>)
+- Synchronously wait for the Kubernetes service at startup.
+- Add a SIGTERM/SIGINT handler.
+
 ## Version 1.12 (Dec 15 2015 Abhishek Shah <abshah@google.com>)
 - Gave pods their own cache store. (034ecbd)
 - Allow pods to have dns. (717660a)
diff --git a/cluster/addons/dns/kube2sky/Makefile b/cluster/addons/dns/kube2sky/Makefile
index 7169d4fea66..8054c1a3f9b 100644
--- a/cluster/addons/dns/kube2sky/Makefile
+++ b/cluster/addons/dns/kube2sky/Makefile
@@ -18,7 +18,7 @@
 
 .PHONY: all kube2sky container push clean test
 
-TAG = 1.12
+TAG = 1.13
 PREFIX = gcr.io/google_containers
 
 all: container
diff --git a/cluster/addons/dns/kube2sky/kube2sky.go b/cluster/addons/dns/kube2sky/kube2sky.go
index d79fc7f7fce..990c1f40b81 100644
--- a/cluster/addons/dns/kube2sky/kube2sky.go
+++ b/cluster/addons/dns/kube2sky/kube2sky.go
@@ -26,8 +26,10 @@ import (
 	"net/http"
 	"net/url"
 	"os"
+	"os/signal"
 	"strings"
 	"sync"
+	"syscall"
 	"time"
 
 	etcd "github.com/coreos/go-etcd/etcd"
@@ -47,12 +49,16 @@ import (
 	"k8s.io/kubernetes/pkg/util/wait"
 )
 
+// The name of the "master" Kubernetes Service.
+const kubernetesSvcName = "kubernetes"
+
 var (
 	argDomain              = flag.String("domain", "cluster.local", "domain under which to create names")
 	argEtcdMutationTimeout = flag.Duration("etcd-mutation-timeout", 10*time.Second, "crash after retrying etcd mutation for a specified duration")
 	argEtcdServer          = flag.String("etcd-server", "http://127.0.0.1:4001", "URL to etcd server")
 	argKubecfgFile         = flag.String("kubecfg-file", "", "Location of kubecfg file for access to kubernetes master service; --kube-master-url overrides the URL part of this; if neither this nor --kube-master-url are provided, defaults to service account tokens")
 	argKubeMasterURL       = flag.String("kube-master-url", "", "URL to reach kubernetes master. Env variables in this flag will be expanded.")
+	healthzPort            = flag.Int("healthz-port", 8081, "port on which to serve a kube2sky HTTP readiness probe.")
 )
 
 const (
@@ -410,7 +416,11 @@ func (ks *kube2sky) removeService(obj interface{}) {
 }
 
 func (ks *kube2sky) updateService(oldObj, newObj interface{}) {
-	// TODO: Avoid unwanted updates.
+	// TODO: We shouldn't leave etcd in a state where it doesn't have a
+	// record for a Service. This removal is needed to completely clean
+	// the directory of a Service, which has SRV records and A records
+	// that are hashed according to oldObj. Unfortunately, this is the
+	// easiest way to purge the directory.
 	ks.removeService(oldObj)
 	ks.newService(newObj)
 }
@@ -562,10 +572,56 @@ func getHash(text string) string {
 	return fmt.Sprintf("%x", h.Sum32())
 }
 
+// waitForKubernetesService waits for the "Kuberntes" master service.
+// Since the health probe on the kube2sky container is essentially an nslookup
+// of this service, we cannot serve any DNS records if it doesn't show up.
+// Once the Service is found, we start replying on this containers readiness
+// probe endpoint.
+func waitForKubernetesService(client *kclient.Client) (svc *kapi.Service) {
+	name := fmt.Sprintf("%v/%v", kapi.NamespaceDefault, kubernetesSvcName)
+	glog.Infof("Waiting for service: %v", name)
+	var err error
+	servicePollInterval := 1 * time.Second
+	for {
+		svc, err = client.Services(kapi.NamespaceDefault).Get(kubernetesSvcName)
+		if err != nil || svc == nil {
+			glog.Infof("Ignoring error while waiting for service %v: %v. Sleeping %v before retrying.", name, err, servicePollInterval)
+			time.Sleep(servicePollInterval)
+			continue
+		}
+		break
+	}
+	return
+}
+
+// setupSignalHandlers runs a goroutine that waits on SIGINT or SIGTERM and logs it
+// before exiting.
+func setupSignalHandlers() {
+	sigChan := make(chan os.Signal)
+	signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
+	// This program should always exit gracefully logging that it received
+	// either a SIGINT or SIGTERM. Since kube2sky is run in a container
+	// without a liveness probe as part of the kube-dns pod, it shouldn't
+	// restart unless the pod is deleted. If it restarts without logging
+	// anything it means something is seriously wrong.
+	// TODO: Remove once #22290 is fixed.
+	go func() {
+		glog.Fatalf("Received signal %s", <-sigChan)
+	}()
+}
+
+// setupHealthzHandlers sets up a readiness and liveness endpoint for kube2sky.
+func setupHealthzHandlers(ks *kube2sky) {
+	http.HandleFunc("/readiness", func(w http.ResponseWriter, req *http.Request) {
+		fmt.Fprintf(w, "ok\n")
+	})
+}
+
 func main() {
 	flag.CommandLine.SetNormalizeFunc(util.WarnWordSepNormalizeFunc)
 	flag.Parse()
 	var err error
+	setupSignalHandlers()
 	// TODO: Validate input flags.
 	domain := *argDomain
 	if !strings.HasSuffix(domain, ".") {
@@ -583,10 +639,20 @@ func main() {
 	if err != nil {
 		glog.Fatalf("Failed to create a kubernetes client: %v", err)
 	}
+	// Wait synchronously for the Kubernetes service and add a DNS record for it.
+	ks.newService(waitForKubernetesService(kubeClient))
+	glog.Infof("Successfully added DNS record for Kubernetes service.")
 
 	ks.endpointsStore = watchEndpoints(kubeClient, &ks)
 	ks.servicesStore = watchForServices(kubeClient, &ks)
 	ks.podsStore = watchPods(kubeClient, &ks)
 
-	select {}
+	// We declare kube2sky ready when:
+	// 1. It has retrieved the Kubernetes master service from the apiserver. If this
+	//    doesn't happen skydns will fail its liveness probe assuming that it can't
+	//    perform any cluster local DNS lookups.
+	// 2. It has setup the 3 watches above.
+	// Once ready this container never flips to not-ready.
+	setupHealthzHandlers(&ks)
+	glog.Fatal(http.ListenAndServe(fmt.Sprintf(":%d", *healthzPort), nil))
 }
diff --git a/cluster/addons/dns/skydns-rc.yaml.in b/cluster/addons/dns/skydns-rc.yaml.in
index 002d8f09193..549dcad62a9 100644
--- a/cluster/addons/dns/skydns-rc.yaml.in
+++ b/cluster/addons/dns/skydns-rc.yaml.in
@@ -1,32 +1,35 @@
 apiVersion: v1
 kind: ReplicationController
 metadata:
-  name: kube-dns-v10
+  name: kube-dns-v11
   namespace: kube-system
   labels:
     k8s-app: kube-dns
-    version: v10
+    version: v11
     kubernetes.io/cluster-service: "true"
 spec:
   replicas: {{ pillar['dns_replicas'] }}
   selector:
     k8s-app: kube-dns
-    version: v10
+    version: v11
   template:
     metadata:
       labels:
         k8s-app: kube-dns
-        version: v10
+        version: v11
         kubernetes.io/cluster-service: "true"
     spec:
       containers:
       - name: etcd
         image: gcr.io/google_containers/etcd-amd64:2.2.1
         resources:
-          # keep request = limit to keep this container in guaranteed class
+          # TODO: Set memory limits when we've profiled the container for large
+          # clusters, then set request = limit to keep this container in
+          # guaranteed class. Currently, this container falls into the
+          # "burstable" category so the kubelet doesn't backoff from restarting it.
           limits:
             cpu: 100m
-            memory: 50Mi
+            memory: 500Mi
           requests:
             cpu: 100m
             memory: 50Mi
@@ -44,25 +47,50 @@ spec:
         - name: etcd-storage
           mountPath: /var/etcd/data
       - name: kube2sky
-        image: gcr.io/google_containers/kube2sky:1.12
+        image: gcr.io/google_containers/kube2sky:1.13
         resources:
-          # keep request = limit to keep this container in guaranteed class
+          # TODO: Set memory limits when we've profiled the container for large
+          # clusters, then set request = limit to keep this container in
+          # guaranteed class. Currently, this container falls into the
+          # "burstable" category so the kubelet doesn't backoff from restarting it.
           limits:
             cpu: 100m
-            memory: 50Mi
+            # Kube2sky watches all pods.
+            memory: 200Mi
           requests:
             cpu: 100m
             memory: 50Mi
+        livenessProbe:
+          httpGet:
+            path: /healthz
+            port: 8080
+            scheme: HTTP
+          initialDelaySeconds: 60
+          timeoutSeconds: 5
+          successThreshold: 1
+          failureThreshold: 5
+        readinessProbe:
+          httpGet:
+            path: /readiness
+            port: 8081
+            scheme: HTTP
+          # we poll on pod startup for the Kubernetes master service and
+          # only setup the /readiness HTTP server once that's available.
+          initialDelaySeconds: 30
+          timeoutSeconds: 5
         args:
         # command = "/kube2sky"
         - --domain={{ pillar['dns_domain'] }}
       - name: skydns
         image: gcr.io/google_containers/skydns:2015-10-13-8c72f8c
         resources:
-          # keep request = limit to keep this container in guaranteed class
+          # TODO: Set memory limits when we've profiled the container for large
+          # clusters, then set request = limit to keep this container in
+          # guaranteed class. Currently, this container falls into the
+          # "burstable" category so the kubelet doesn't backoff from restarting it.
           limits:
             cpu: 100m
-            memory: 50Mi
+            memory: 200Mi
           requests:
             cpu: 100m
             memory: 50Mi
@@ -79,20 +107,6 @@ spec:
         - containerPort: 53
           name: dns-tcp
           protocol: TCP
-        livenessProbe:
-          httpGet:
-            path: /healthz
-            port: 8080
-            scheme: HTTP
-          initialDelaySeconds: 30
-          timeoutSeconds: 5
-        readinessProbe:
-          httpGet:
-            path: /healthz
-            port: 8080
-            scheme: HTTP
-          initialDelaySeconds: 1
-          timeoutSeconds: 5
       - name: healthz
         image: gcr.io/google_containers/exechealthz:1.0
         resources: