Deferred creation of SkyDNS, monitoring and logging objects

This implements phase 1 of the proposal in #3579, moving the creation of the pods, RCs, and services to the master after the apiserver is available. This is such a wide commit because our existing initial config story is special: * Add kube-addons service and associated salt configuration: ** We configure /etc/kubernetes/addons to be a directory of objects that are appropriately configured for the current cluster. ** "/etc/init.d/kube-addons start" slurps up everything in that dir. (Most of the difficult is the business logic in salt around getting that directory built at all.) ** We cheat and overlay cluster/addons into saltbase/salt/kube-addons as config files for the kube-addons meta-service. * Change .yaml.in files to salt templates * Rename {setup,teardown}-{monitoring,logging} to {setup,teardown}-{monitoring,logging}-firewall to properly reflect their real purpose now (the purpose of these functions is now ONLY to bring up the firewall rules, and possibly to relay the IP to the user). * Rework GCE {setup,teardown}-{monitoring,logging}-firewall: Both functions were improperly configuring global rules, yet used lifecycles tied to the cluster. Use $NODE_INSTANCE_PREFIX with the rule. The logging rule needed a $NETWORK specifier. The monitoring rule tried gcloud describe first, but given the instancing, this feels like a waste of time now. * Plumb ENABLE_CLUSTER_MONITORING, ENABLE_CLUSTER_LOGGING, ELASTICSEARCH_LOGGING_REPLICAS and DNS_REPLICAS down to the master, since these are needed there now. (Desperately want just a yaml or json file we can share between providers that has all this crap. Maybe #3525 is an answer?) Huge caveats: I've gone pretty firm testing on GCE, including twiddling the env variables and making sure the objects I expect to come up, come up. I've tested that it doesn't break GKE bringup somehow. But I haven't had a chance to test the other providers.
2025-09-14 05:36:12 +00:00 · 2015-01-18 15:16:52 -08:00
parent 3c15427b2d
commit a305269e18
21 changed files with 334 additions and 151 deletions
--- a/cluster/gce/templates/create-dynamic-salt-files.sh
+++ b/cluster/gce/templates/create-dynamic-salt-files.sh
@@ -22,10 +22,14 @@ mkdir -p /srv/salt-overlay/pillar
 cat <<EOF >/srv/salt-overlay/pillar/cluster-params.sls
 node_instance_prefix: '$(echo "$NODE_INSTANCE_PREFIX" | sed -e "s/'/''/g")'
 portal_net: '$(echo "$PORTAL_NET" | sed -e "s/'/''/g")'
+enable_cluster_monitoring: '$(echo "$ENABLE_CLUSTER_MONITORING" | sed -e "s/'/''/g")'
 enable_node_monitoring: '$(echo "$ENABLE_NODE_MONITORING" | sed -e "s/'/''/g")'
+enable_cluster_logging: '$(echo "$ENABLE_CLUSTER_LOGGING" | sed -e "s/'/''/g")'
 enable_node_logging: '$(echo "$ENABLE_NODE_LOGGING" | sed -e "s/'/''/g")'
 logging_destination: '$(echo "$LOGGING_DESTINATION" | sed -e "s/'/''/g")'
+elasticsearch_replicas: '$(echo "$ELASTICSEARCH_LOGGING_REPLICAS" | sed -e "s/'/''/g")'
 enable_cluster_dns: '$(echo "$ENABLE_CLUSTER_DNS" | sed -e "s/'/''/g")'
+dns_replicas: '$(echo "$DNS_REPLICAS" | sed -e "s/'/''/g")'
 dns_server: '$(echo "$DNS_SERVER_IP" | sed -e "s/'/''/g")'
 dns_domain: '$(echo "$DNS_DOMAIN" | sed -e "s/'/''/g")'
 EOF
--- a/cluster/gce/util.sh
+++ b/cluster/gce/util.sh
@@ -393,10 +393,14 @@ function kube-up {
    echo "readonly SALT_TAR_URL='${SALT_TAR_URL}'"
    echo "readonly MASTER_HTPASSWD='${htpasswd}'"
    echo "readonly PORTAL_NET='${PORTAL_NET}'"
+    echo "readonly ENABLE_CLUSTER_MONITORING='${ENABLE_CLUSTER_MONITORING:-false}'"
    echo "readonly ENABLE_NODE_MONITORING='${ENABLE_NODE_MONITORING:-false}'"
+    echo "readonly ENABLE_CLUSTER_LOGGING='${ENABLE_CLUSTER_LOGGING:-false}'"
    echo "readonly ENABLE_NODE_LOGGING='${ENABLE_NODE_LOGGING:-false}'"
    echo "readonly LOGGING_DESTINATION='${LOGGING_DESTINATION:-}'"
+    echo "readonly ELASTICSEARCH_LOGGING_REPLICAS='${ELASTICSEARCH_LOGGING_REPLICAS:-}'"
    echo "readonly ENABLE_CLUSTER_DNS='${ENABLE_CLUSTER_DNS:-false}'"
+    echo "readonly DNS_REPLICAS='${DNS_REPLICAS:-}'"
    echo "readonly DNS_SERVER_IP='${DNS_SERVER_IP:-}'"
    echo "readonly DNS_DOMAIN='${DNS_DOMAIN:-}'"
    grep -v "^#" "${KUBE_ROOT}/cluster/gce/templates/common.sh"
@@ -731,106 +735,70 @@ function restart-kube-proxy {
  ssh-to-node "$1" "sudo /etc/init.d/kube-proxy restart"
 }

-# Setup monitoring using heapster and InfluxDB
-function setup-monitoring {
-  if [[ "${ENABLE_CLUSTER_MONITORING}" == "true" ]]; then
-    echo "Setting up cluster monitoring using Heapster."
+# Setup monitoring firewalls using heapster and InfluxDB
+function setup-monitoring-firewall {
+  if [[ "${ENABLE_CLUSTER_MONITORING}" != "true" ]]; then
+    return
+  fi

-    detect-project
-    if ! gcloud compute firewall-rules --project "${PROJECT}" describe monitoring-heapster &> /dev/null; then
-      if ! gcloud compute firewall-rules create monitoring-heapster \
-          --project "${PROJECT}" \
-          --target-tags="${MINION_TAG}" \
-          --network="${NETWORK}" \
-          --allow tcp:80 tcp:8083 tcp:8086; then
-        echo -e "${color_red}Failed to set up firewall for monitoring ${color_norm}" && false
-      fi
-    fi
+  echo "Setting up firewalls to Heapster based cluster monitoring."

-    local kubectl="${KUBE_ROOT}/cluster/kubectl.sh"
-    local grafana_host=""
-    if "${kubectl}" create -f "${KUBE_ROOT}/cluster/addons/cluster-monitoring/" &> /dev/null; then
-      # wait for pods to be scheduled on a node.
-      echo "waiting for monitoring pods to be scheduled."
-      for i in `seq 1 10`; do
-	grafana_host=$("${kubectl}" get pods -l name=influxGrafana -o template -t {{range.items}}{{.currentState.hostIP}}:{{end}} | sed s/://g) 
-	if [[ $grafana_host != *"<"* ]]; then
+  detect-project
+  gcloud compute firewall-rules create "${INSTANCE_PREFIX}-monitoring-heapster" --project "${PROJECT}" \
+    --allow tcp:80 tcp:8083 tcp:8086 --target-tags="${MINION_TAG}" --network="${NETWORK}"
+
+  local kubectl="${KUBE_ROOT}/cluster/kubectl.sh"
+  local grafana_host=""
+  echo "waiting for monitoring pods to be scheduled."
+  for i in `seq 1 10`; do
+    grafana_host=$("${kubectl}" get pods -l name=influxGrafana -o template -t {{range.items}}{{.currentState.hostIP}}:{{end}} | sed s/://g)
+    if [[ ${grafana_host} != *"<"* ]]; then
 	  break
-	fi
-	sleep 10
-      done
-      if [[ $grafana_host != *"<"* ]]; then
-	echo
-	echo -e "${color_green}Grafana dashboard will be available at ${color_yellow}http://$grafana_host${color_green}. Wait for the monitoring dashboard to be online.${color_norm}"
-	echo
-      else
-	echo -e "${color_red}monitoring pods failed to be scheduled.${color_norm}"
-      fi
-    else
-      echo -e "${color_red}Failed to Setup Monitoring ${color_norm}"
-      teardown-monitoring
    fi
+    sleep 10
+  done
+  if [[ ${grafana_host} != *"<"* ]]; then
+    echo
+    echo -e "${color_green}Grafana dashboard will be available at ${color_yellow}http://${grafana_host}${color_green}. Wait for the monitoring dashboard to be online.${color_norm}"
+    echo
+  else
+    echo -e "${color_red}Monitoring pods failed to be scheduled!${color_norm}"
  fi
 }

-function teardown-monitoring {
-  if [[ "${ENABLE_CLUSTER_MONITORING}" == "true" ]]; then
-    detect-project
-
-    local kubectl="${KUBE_ROOT}/cluster/kubectl.sh"
-    local kubecfg="${KUBE_ROOT}/cluster/kubecfg.sh"
-    "${kubecfg}" resize monitoring-influxGrafanaController 0 &> /dev/null || true
-    "${kubecfg}" resize monitoring-heapsterController 0 &> /dev/null || true
-    "${kubectl}" delete -f "${KUBE_ROOT}/cluster/addons/cluster-monitoring/" &> /dev/null || true
-    if gcloud compute firewall-rules describe --project "${PROJECT}" monitoring-heapster &> /dev/null; then
-      gcloud compute firewall-rules delete \
-          --project "${PROJECT}" \
-          --quiet \
-          monitoring-heapster &> /dev/null || true
-    fi
+function teardown-monitoring-firewall {
+  if [[ "${ENABLE_CLUSTER_MONITORING}" != "true" ]]; then
+    return
  fi
+
+  detect-project
+  gcloud compute firewall-rules delete -q "${INSTANCE_PREFIX}-monitoring-heapster" --project "${PROJECT}" || true
 }

-function setup-logging {
+function setup-logging-firewall {
  # If logging with Fluentd to Elasticsearch is enabled then create pods
  # and services for Elasticsearch (for ingesting logs) and Kibana (for
  # viewing logs).
-  if [[ "${ENABLE_NODE_LOGGING-}" == "true" ]] && \
-     [[ "${LOGGING_DESTINATION-}" == "elasticsearch" ]] && \
-     [[ "${ENABLE_CLUSTER_LOGGING-}" == "true" ]]; then
-    local -r kubectl="${KUBE_ROOT}/cluster/kubectl.sh"
-    if sed -e "s/{ELASTICSEARCH_LOGGING_REPLICAS}/${ELASTICSEARCH_LOGGING_REPLICAS}/g" \
-              "${KUBE_ROOT}"/cluster/addons/fluentd-elasticsearch/es-controller.yaml.in | \
-              "${kubectl}" create -f - &> /dev/null && \
-       "${kubectl}" create -f "${KUBE_ROOT}"/cluster/addons/fluentd-elasticsearch/es-service.yaml &> /dev/null && \
-       "${kubectl}" create -f "${KUBE_ROOT}"/cluster/addons/fluentd-elasticsearch/kibana-controller.yaml &> /dev/null && \
-       "${kubectl}" create -f "${KUBE_ROOT}"/cluster/addons/fluentd-elasticsearch/kibana-service.yaml &> /dev/null; then
-      gcloud compute firewall-rules create fluentd-elasticsearch-logging --project "${PROJECT}" \
-             --allow tcp:5601 tcp:9200 tcp:9300 --target-tags "${INSTANCE_PREFIX}"-minion || true
-      local -r region="${ZONE::-2}"
-      local -r es_ip=$(gcloud compute forwarding-rules --project "${PROJECT}" describe --region "${region}" elasticsearch-logging | grep IPAddress | awk '{print $2}')
-      local -r kibana_ip=$(gcloud compute forwarding-rules --project "${PROJECT}" describe --region "${region}" kibana-logging | grep IPAddress | awk '{print $2}')
-      echo
-      echo -e "${color_green}Cluster logs are ingested into Elasticsearch running at ${color_yellow}http://${es_ip}:9200"
-      echo -e "${color_green}Kibana logging dashboard will be available at ${color_yellow}http://${kibana_ip}:5601${color_norm}"
-      echo
-    else
-      echo -e "${color_red}Failed to launch Elasticsearch and Kibana pods and services for logging.${color_norm}"
-    fi
+  if [[ "${ENABLE_NODE_LOGGING-}" != "true" ]] || \
+     [[ "${LOGGING_DESTINATION-}" != "elasticsearch" ]] || \
+     [[ "${ENABLE_CLUSTER_LOGGING-}" != "true" ]]; then
+    return
  fi
+
+  detect-project
+  gcloud compute firewall-rules create "${INSTANCE_PREFIX}-fluentd-elasticsearch-logging" --project "${PROJECT}" \
+    --allow tcp:5601 tcp:9200 tcp:9300 --target-tags "${MINION_TAG}" --network="${NETWORK}"
 }

-function teardown-logging {
-  if [[ "${ENABLE_NODE_LOGGING-}" == "true" ]] && \
-     [[ "${LOGGING_DESTINATION-}" == "elasticsearch" ]] && \
-     [[ "${ENABLE_CLUSTER_LOGGING-}" == "true" ]]; then
-    local -r kubectl="${KUBE_ROOT}/cluster/kubectl.sh"
-    "${kubectl}" delete replicationController elasticsearch-logging-controller &> /dev/null || true
-    "${kubectl}" delete service elasticsearch-logging &> /dev/null || true
-    "${kubectl}" delete replicationController kibana-logging-controller &> /dev/null || true
-    "${kubectl}" delete service kibana-logging &> /dev/null || true
-    gcloud compute firewall-rules delete -q fluentd-elasticsearch-logging --project "${PROJECT}" || true
+function teardown-logging-firewall {
+  if [[ "${ENABLE_NODE_LOGGING-}" != "true" ]] || \
+     [[ "${LOGGING_DESTINATION-}" != "elasticsearch" ]] || \
+     [[ "${ENABLE_CLUSTER_LOGGING-}" != "true" ]]; then
+    return
  fi
+
+  detect-project
+  gcloud compute firewall-rules delete -q "${INSTANCE_PREFIX}-fluentd-elasticsearch-logging" --project "${PROJECT}" || true
 }

 # Perform preparations required to run e2e tests