Fixes to get AWS tests to run

They don't pass (yet), but they at least run!
2026-01-04 23:17:50 +00:00 · 2015-02-11 12:50:47 -05:00
parent 15c57efde2
commit cf470f7da4
5 changed files with 308 additions and 59 deletions
--- a/cluster/aws/util.sh
+++ b/cluster/aws/util.sh
@@ -18,7 +18,8 @@

 # Use the config file specified in $KUBE_CONFIG_FILE, or default to
 # config-default.sh.
-source $(dirname ${BASH_SOURCE})/${KUBE_CONFIG_FILE-"config-default.sh"}
+KUBE_ROOT=$(dirname "${BASH_SOURCE}")/../..
+source "${KUBE_ROOT}/cluster/aws/${KUBE_CONFIG_FILE-"config-default.sh"}"

 export AWS_DEFAULT_REGION=${ZONE}
 AWS_CMD="aws --output json ec2"
@@ -30,7 +31,7 @@ function json_val {
 # TODO (ayurchuk) Refactor the get_* functions to use filters
 # TODO (bburns) Parameterize this for multiple cluster per project
 function get_instance_ids {
-  python -c 'import json,sys; lst = [str(instance["InstanceId"]) for reservation in json.load(sys.stdin)["Reservations"] for instance in reservation["Instances"] for tag in instance.get("Tags", []) if tag["Value"].startswith("kubernetes-minion") or tag["Value"].startswith("kubernetes-master")]; print " ".join(lst)'
+  python -c "import json,sys; lst = [str(instance['InstanceId']) for reservation in json.load(sys.stdin)['Reservations'] for instance in reservation['Instances'] for tag in instance.get('Tags', []) if tag['Value'].startswith('${MASTER_TAG}') or tag['Value'].startswith('${MINION_TAG}')]; print ' '.join(lst)"
 }

 function get_vpc_id {
@@ -106,20 +107,6 @@ function ensure-temp-dir {
  fi
 }

-function setup-monitoring-firewall {
-  if [[ "${ENABLE_CLUSTER_MONITORING:-false}" == "true" ]]; then
-    # TODO: Implement this.
-    echo "Monitoring not currently supported on AWS"
-  fi
-}
-
-function teardown-monitoring-firewall {
-  if [[ "${ENABLE_CLUSTER_MONITORING:-false}" == "true" ]]; then
-    # TODO: Implement this.
-    echo "Monitoring not currently supported on AWS"
-  fi
-}
-
 # Verify and find the various tar files that we are going to use on the server.
 #
 # Vars set:
@@ -342,10 +329,56 @@ function kube-up {
  add-tag $master_id Name $MASTER_NAME
  add-tag $master_id Role $MASTER_TAG

-  echo "Waiting 1 minute for master to be ready"
-  # TODO(justinsb): Actually poll for the master being ready
-  #  (we at least need the salt-master to be up before the minions come up)
-  sleep 60
+  echo "Waiting for master to be ready"
+
+  local attempt=0
+
+   while true; do
+    echo -n Attempt "$(($attempt+1))" to check for master node
+    local ip=$($AWS_CMD describe-instances | get_instance_public_ip $MASTER_NAME)
+    if [[ -z "${ip}" ]]; then
+      if (( attempt > 30 )); then
+        echo
+        echo -e "${color_red}master failed to start. Your cluster is unlikely" >&2
+        echo "to work correctly. Please run ./cluster/kube-down.sh and re-create the" >&2
+        echo -e "cluster. (sorry!)${color_norm}" >&2
+        exit 1
+      fi
+    else
+      KUBE_MASTER=${MASTER_NAME}
+      KUBE_MASTER_IP=${ip}
+
+      echo -e " ${color_green}[master running @${KUBE_MASTER_IP}]${color_norm}"
+      break
+    fi
+    echo -e " ${color_yellow}[master not working yet]${color_norm}"
+    attempt=$(($attempt+1))
+    sleep 10
+  done
+
+  # We need the salt-master to be up for the minions to work
+  attempt=0
+  while true; do
+    echo -n Attempt "$(($attempt+1))" to check for salt-master
+    local output
+    output=$(ssh -oStrictHostKeyChecking=no -i ${AWS_SSH_KEY} ubuntu@${KUBE_MASTER_IP} pgrep salt-master 2> $LOG) || output=""
+    if [[ -z "${output}" ]]; then
+      if (( attempt > 30 )); then
+        echo
+        echo -e "${color_red}salt-master failed to start on ${KUBE_MASTER_IP}. Your cluster is unlikely" >&2
+        echo "to work correctly. Please run ./cluster/kube-down.sh and re-create the" >&2
+        echo -e "cluster. (sorry!)${color_norm}" >&2
+        exit 1
+      fi
+    else
+      echo -e " ${color_green}[salt-master running]${color_norm}"
+      break
+    fi
+    echo -e " ${color_yellow}[salt-master not working yet]${color_norm}"
+    attempt=$(($attempt+1))
+    sleep 10
+  done
+

  for (( i=0; i<${#MINION_NAMES[@]}; i++)); do
    echo "Starting Minion (${MINION_NAMES[$i]})"
@@ -413,7 +446,7 @@ function kube-up {
    sleep 10
  done
  echo "Re-running salt highstate"
-  ssh -oStrictHostKeyChecking=no -i ~/.ssh/kube_aws_rsa ubuntu@${KUBE_MASTER_IP} sudo salt '*' state.highstate > $LOG
+  ssh -oStrictHostKeyChecking=no -i ${AWS_SSH_KEY} ubuntu@${KUBE_MASTER_IP} sudo salt '*' state.highstate > $LOG

  echo "Waiting for cluster initialization."
  echo
@@ -439,7 +472,7 @@ function kube-up {
  # Basic sanity checking
  for i in ${KUBE_MINION_IP_ADDRESSES[@]}; do
    # Make sure docker is installed
-    ssh -oStrictHostKeyChecking=no ubuntu@$i -i ~/.ssh/kube_aws_rsa which docker > $LOG 2>&1
+    ssh -oStrictHostKeyChecking=no ubuntu@$i -i ${AWS_SSH_KEY} which docker > $LOG 2>&1
    if [ "$?" != "0" ]; then
      echo "Docker failed to install on $i. Your cluster is unlikely to work correctly."
      echo "Please run ./cluster/aws/kube-down.sh and re-create the cluster. (sorry!)"
@@ -461,9 +494,9 @@ function kube-up {
  # config file.  Distribute the same way the htpasswd is done.
  (
    umask 077
-    ssh -oStrictHostKeyChecking=no -i ~/.ssh/kube_aws_rsa ubuntu@${KUBE_MASTER_IP} sudo cat /srv/kubernetes/kubecfg.crt >"${HOME}/${kube_cert}" 2>$LOG
-    ssh -oStrictHostKeyChecking=no -i ~/.ssh/kube_aws_rsa ubuntu@${KUBE_MASTER_IP} sudo cat /srv/kubernetes/kubecfg.key >"${HOME}/${kube_key}" 2>$LOG
-    ssh -oStrictHostKeyChecking=no -i ~/.ssh/kube_aws_rsa ubuntu@${KUBE_MASTER_IP} sudo cat /srv/kubernetes/ca.crt >"${HOME}/${ca_cert}" 2>$LOG
+    ssh -oStrictHostKeyChecking=no -i ${AWS_SSH_KEY} ubuntu@${KUBE_MASTER_IP} sudo cat /srv/kubernetes/kubecfg.crt >"${HOME}/${kube_cert}" 2>$LOG
+    ssh -oStrictHostKeyChecking=no -i ${AWS_SSH_KEY} ubuntu@${KUBE_MASTER_IP} sudo cat /srv/kubernetes/kubecfg.key >"${HOME}/${kube_key}" 2>$LOG
+    ssh -oStrictHostKeyChecking=no -i ${AWS_SSH_KEY} ubuntu@${KUBE_MASTER_IP} sudo cat /srv/kubernetes/ca.crt >"${HOME}/${ca_cert}" 2>$LOG

    cat << EOF > ~/.kubernetes_auth
 {
@@ -482,33 +515,41 @@ EOF

 function kube-down {
  instance_ids=$($AWS_CMD describe-instances | get_instance_ids)
-  $AWS_CMD terminate-instances --instance-ids $instance_ids > $LOG
-  echo "Waiting for instances deleted"
-  while true; do
-    instance_states=$($AWS_CMD describe-instances --instance-ids $instance_ids | expect_instance_states terminated)
-    if [[ "$instance_states" == "" ]]; then
-      echo "All instances terminated"
-      break
-    else
-      echo "Instances not yet terminated: $instance_states"
-      echo "Sleeping for 3 seconds..."
-      sleep 3
-    fi
-  done
+  if [[ -n ${instance_ids} ]]; then
+    $AWS_CMD terminate-instances --instance-ids $instance_ids > $LOG
+    echo "Waiting for instances deleted"
+    while true; do
+      instance_states=$($AWS_CMD describe-instances --instance-ids $instance_ids | expect_instance_states terminated)
+      if [[ "$instance_states" == "" ]]; then
+        echo "All instances terminated"
+        break
+      else
+        echo "Instances not yet terminated: $instance_states"
+        echo "Sleeping for 3 seconds..."
+        sleep 3
+      fi
+    done
+  fi

  echo "Deleting VPC"
-  vpc_id=$($AWS_CMD describe-vpcs | get_vpc_id)
-  subnet_id=$($AWS_CMD describe-subnets | get_subnet_id $vpc_id)
-  igw_id=$($AWS_CMD describe-internet-gateways | get_igw_id $vpc_id)
-  route_table_id=$($AWS_CMD describe-route-tables | get_route_table_id $vpc_id)
  sec_group_id=$($AWS_CMD describe-security-groups | get_sec_group_id)
+  if [[ -n "${sec_group_id}" ]]; then
+    $AWS_CMD delete-security-group --group-id $sec_group_id > $LOG
+  fi

-  $AWS_CMD delete-subnet --subnet-id $subnet_id > $LOG
-  $AWS_CMD detach-internet-gateway --internet-gateway-id $igw_id --vpc-id $vpc_id > $LOG
-  $AWS_CMD delete-internet-gateway --internet-gateway-id $igw_id > $LOG
-  $AWS_CMD delete-security-group --group-id $sec_group_id > $LOG
-  $AWS_CMD delete-route --route-table-id $route_table_id --destination-cidr-block 0.0.0.0/0 > $LOG
-  $AWS_CMD delete-vpc --vpc-id $vpc_id > $LOG
+  vpc_id=$($AWS_CMD describe-vpcs | get_vpc_id)
+  if [[ -n "${vpc_id}" ]]; then
+    subnet_id=$($AWS_CMD describe-subnets | get_subnet_id $vpc_id)
+    igw_id=$($AWS_CMD describe-internet-gateways | get_igw_id $vpc_id)
+    route_table_id=$($AWS_CMD describe-route-tables | get_route_table_id $vpc_id)
+
+    $AWS_CMD delete-subnet --subnet-id $subnet_id > $LOG
+    $AWS_CMD detach-internet-gateway --internet-gateway-id $igw_id --vpc-id $vpc_id > $LOG
+    $AWS_CMD delete-internet-gateway --internet-gateway-id $igw_id > $LOG
+    $AWS_CMD delete-route --route-table-id $route_table_id --destination-cidr-block 0.0.0.0/0 > $LOG
+
+    $AWS_CMD delete-vpc --vpc-id $vpc_id > $LOG
+  fi
 }

 function setup-logging-firewall {
@@ -518,3 +559,127 @@ function setup-logging-firewall {
 function teardown-logging-firewall {
  echo "TODO: teardown logging"
 }
+
+# -----------------------------------------------------------------------------
+# Cluster specific test helpers used from hack/e2e-test.sh
+
+# Execute prior to running tests to build a release if required for env.
+#
+# Assumed Vars:
+#   KUBE_ROOT
+function test-build-release {
+  # Make a release
+  "${KUBE_ROOT}/build/release.sh"
+}
+
+# Execute prior to running tests to initialize required structure. This is
+# called from hack/e2e.go only when running -up (it is run after kube-up).
+#
+# Assumed vars:
+#   Variables from config.sh
+function test-setup {
+  echo "test-setup complete"
+}
+
+# Execute after running tests to perform any required clean-up. This is called
+# from hack/e2e.go
+function test-teardown {
+#  detect-project
+#  echo "Shutting down test cluster in background."
+#  gcloud compute firewall-rules delete  \
+#    --project "${PROJECT}" \
+#    --quiet \
+#    "${MINION_TAG}-${INSTANCE_PREFIX}-http-alt" || true
+  echo "Shutting down test cluster."
+  "${KUBE_ROOT}/cluster/kube-down.sh"
+}
+
+# SSH to a node by name ($1) and run a command ($2).
+function ssh-to-node {
+  local node="$1"
+  local cmd="$2"
+  for try in $(seq 1 5); do
+    if gcloud compute ssh --ssh-flag="-o LogLevel=quiet" --project "${PROJECT}" --zone="${ZONE}" "${node}" --command "${cmd}"; then
+      break
+    fi
+  done
+}
+
+# Restart the kube-proxy on a node ($1)
+function restart-kube-proxy {
+  ssh-to-node "$1" "sudo /etc/init.d/kube-proxy restart"
+}
+
+# Setup monitoring firewalls using heapster and InfluxDB
+function setup-monitoring-firewall {
+  if [[ "${ENABLE_CLUSTER_MONITORING}" != "true" ]]; then
+    return
+  fi
+
+  # TODO: Support monitoring firewall
+  echo "Cluster monitoring setup is not (yet) supported on AWS"
+}
+
+function teardown-monitoring-firewall {
+  if [[ "${ENABLE_CLUSTER_MONITORING}" != "true" ]]; then
+    return
+  fi
+
+  # TODO: Support monitoring firewall
+}
+
+function setup-logging-firewall {
+  # If logging with Fluentd to Elasticsearch is enabled then create pods
+  # and services for Elasticsearch (for ingesting logs) and Kibana (for
+  # viewing logs).
+  if [[ "${ENABLE_NODE_LOGGING-}" != "true" ]] || \
+     [[ "${LOGGING_DESTINATION-}" != "elasticsearch" ]] || \
+     [[ "${ENABLE_CLUSTER_LOGGING-}" != "true" ]]; then
+    return
+  fi
+
+  # TODO: Support logging
+  echo "Logging setup is not (yet) supported on AWS"
+
+#  detect-project
+#  gcloud compute firewall-rules create "${INSTANCE_PREFIX}-fluentd-elasticsearch-logging" --project "${PROJECT}" \
+#    --allow tcp:5601 tcp:9200 tcp:9300 --target-tags "${MINION_TAG}" --network="${NETWORK}"
+#
+#  # This should be nearly instant once kube-addons gets a chance to
+#  # run, and we already know we can hit the apiserver, but it's still
+#  # worth checking.
+#  echo "waiting for logging services to be created by the master."
+#  local kubectl="${KUBE_ROOT}/cluster/kubectl.sh"
+#  for i in `seq 1 10`; do
+#    if "${kubectl}" get services -l name=kibana-logging -o template -t {{range.items}}{{.id}}{{end}} | grep -q kibana-logging &&
+#      "${kubectl}" get services -l name=elasticsearch-logging -o template -t {{range.items}}{{.id}}{{end}} | grep -q elasticsearch-logging; then
+#      break
+#    fi
+#    sleep 10
+#  done
+#
+#  local -r region="${ZONE::-2}"
+#  local -r es_ip=$(gcloud compute forwarding-rules --project "${PROJECT}" describe --region "${region}" elasticsearch-logging | grep IPAddress | awk '{print $2}')
+#  local -r kibana_ip=$(gcloud compute forwarding-rules --project "${PROJECT}" describe --region "${region}" kibana-logging | grep IPAddress | awk '{print $2}')
+#  echo
+#  echo -e "${color_green}Cluster logs are ingested into Elasticsearch running at ${color_yellow}http://${es_ip}:9200"
+#  echo -e "${color_green}Kibana logging dashboard will be available at ${color_yellow}http://${kibana_ip}:5601${color_norm}"
+#  echo
+}
+
+function teardown-logging-firewall {
+  if [[ "${ENABLE_NODE_LOGGING-}" != "true" ]] || \
+     [[ "${LOGGING_DESTINATION-}" != "elasticsearch" ]] || \
+     [[ "${ENABLE_CLUSTER_LOGGING-}" != "true" ]]; then
+    return
+  fi
+
+  # TODO: Support logging
+}
+
+# Perform preparations required to run e2e tests
+function prepare-e2e() {
+  # (AWS runs detect-project, I don't think we need to anything)
+  # Note: we can't print anything here, or else the test tools will break with the extra output
+  return
+}