Fixes to get AWS tests to run

They don't pass (yet), but they at least run!
This commit is contained in:
Justin Santa Barbara
2015-02-11 12:50:47 -05:00
parent 15c57efde2
commit cf470f7da4
5 changed files with 308 additions and 59 deletions

View File

@@ -18,7 +18,8 @@
# Use the config file specified in $KUBE_CONFIG_FILE, or default to
# config-default.sh.
source $(dirname ${BASH_SOURCE})/${KUBE_CONFIG_FILE-"config-default.sh"}
KUBE_ROOT=$(dirname "${BASH_SOURCE}")/../..
source "${KUBE_ROOT}/cluster/aws/${KUBE_CONFIG_FILE-"config-default.sh"}"
export AWS_DEFAULT_REGION=${ZONE}
AWS_CMD="aws --output json ec2"
@@ -30,7 +31,7 @@ function json_val {
# TODO (ayurchuk) Refactor the get_* functions to use filters
# TODO (bburns) Parameterize this for multiple cluster per project
function get_instance_ids {
python -c 'import json,sys; lst = [str(instance["InstanceId"]) for reservation in json.load(sys.stdin)["Reservations"] for instance in reservation["Instances"] for tag in instance.get("Tags", []) if tag["Value"].startswith("kubernetes-minion") or tag["Value"].startswith("kubernetes-master")]; print " ".join(lst)'
python -c "import json,sys; lst = [str(instance['InstanceId']) for reservation in json.load(sys.stdin)['Reservations'] for instance in reservation['Instances'] for tag in instance.get('Tags', []) if tag['Value'].startswith('${MASTER_TAG}') or tag['Value'].startswith('${MINION_TAG}')]; print ' '.join(lst)"
}
function get_vpc_id {
@@ -106,20 +107,6 @@ function ensure-temp-dir {
fi
}
function setup-monitoring-firewall {
if [[ "${ENABLE_CLUSTER_MONITORING:-false}" == "true" ]]; then
# TODO: Implement this.
echo "Monitoring not currently supported on AWS"
fi
}
function teardown-monitoring-firewall {
if [[ "${ENABLE_CLUSTER_MONITORING:-false}" == "true" ]]; then
# TODO: Implement this.
echo "Monitoring not currently supported on AWS"
fi
}
# Verify and find the various tar files that we are going to use on the server.
#
# Vars set:
@@ -342,10 +329,56 @@ function kube-up {
add-tag $master_id Name $MASTER_NAME
add-tag $master_id Role $MASTER_TAG
echo "Waiting 1 minute for master to be ready"
# TODO(justinsb): Actually poll for the master being ready
# (we at least need the salt-master to be up before the minions come up)
sleep 60
echo "Waiting for master to be ready"
local attempt=0
while true; do
echo -n Attempt "$(($attempt+1))" to check for master node
local ip=$($AWS_CMD describe-instances | get_instance_public_ip $MASTER_NAME)
if [[ -z "${ip}" ]]; then
if (( attempt > 30 )); then
echo
echo -e "${color_red}master failed to start. Your cluster is unlikely" >&2
echo "to work correctly. Please run ./cluster/kube-down.sh and re-create the" >&2
echo -e "cluster. (sorry!)${color_norm}" >&2
exit 1
fi
else
KUBE_MASTER=${MASTER_NAME}
KUBE_MASTER_IP=${ip}
echo -e " ${color_green}[master running @${KUBE_MASTER_IP}]${color_norm}"
break
fi
echo -e " ${color_yellow}[master not working yet]${color_norm}"
attempt=$(($attempt+1))
sleep 10
done
# We need the salt-master to be up for the minions to work
attempt=0
while true; do
echo -n Attempt "$(($attempt+1))" to check for salt-master
local output
output=$(ssh -oStrictHostKeyChecking=no -i ${AWS_SSH_KEY} ubuntu@${KUBE_MASTER_IP} pgrep salt-master 2> $LOG) || output=""
if [[ -z "${output}" ]]; then
if (( attempt > 30 )); then
echo
echo -e "${color_red}salt-master failed to start on ${KUBE_MASTER_IP}. Your cluster is unlikely" >&2
echo "to work correctly. Please run ./cluster/kube-down.sh and re-create the" >&2
echo -e "cluster. (sorry!)${color_norm}" >&2
exit 1
fi
else
echo -e " ${color_green}[salt-master running]${color_norm}"
break
fi
echo -e " ${color_yellow}[salt-master not working yet]${color_norm}"
attempt=$(($attempt+1))
sleep 10
done
for (( i=0; i<${#MINION_NAMES[@]}; i++)); do
echo "Starting Minion (${MINION_NAMES[$i]})"
@@ -413,7 +446,7 @@ function kube-up {
sleep 10
done
echo "Re-running salt highstate"
ssh -oStrictHostKeyChecking=no -i ~/.ssh/kube_aws_rsa ubuntu@${KUBE_MASTER_IP} sudo salt '*' state.highstate > $LOG
ssh -oStrictHostKeyChecking=no -i ${AWS_SSH_KEY} ubuntu@${KUBE_MASTER_IP} sudo salt '*' state.highstate > $LOG
echo "Waiting for cluster initialization."
echo
@@ -439,7 +472,7 @@ function kube-up {
# Basic sanity checking
for i in ${KUBE_MINION_IP_ADDRESSES[@]}; do
# Make sure docker is installed
ssh -oStrictHostKeyChecking=no ubuntu@$i -i ~/.ssh/kube_aws_rsa which docker > $LOG 2>&1
ssh -oStrictHostKeyChecking=no ubuntu@$i -i ${AWS_SSH_KEY} which docker > $LOG 2>&1
if [ "$?" != "0" ]; then
echo "Docker failed to install on $i. Your cluster is unlikely to work correctly."
echo "Please run ./cluster/aws/kube-down.sh and re-create the cluster. (sorry!)"
@@ -461,9 +494,9 @@ function kube-up {
# config file. Distribute the same way the htpasswd is done.
(
umask 077
ssh -oStrictHostKeyChecking=no -i ~/.ssh/kube_aws_rsa ubuntu@${KUBE_MASTER_IP} sudo cat /srv/kubernetes/kubecfg.crt >"${HOME}/${kube_cert}" 2>$LOG
ssh -oStrictHostKeyChecking=no -i ~/.ssh/kube_aws_rsa ubuntu@${KUBE_MASTER_IP} sudo cat /srv/kubernetes/kubecfg.key >"${HOME}/${kube_key}" 2>$LOG
ssh -oStrictHostKeyChecking=no -i ~/.ssh/kube_aws_rsa ubuntu@${KUBE_MASTER_IP} sudo cat /srv/kubernetes/ca.crt >"${HOME}/${ca_cert}" 2>$LOG
ssh -oStrictHostKeyChecking=no -i ${AWS_SSH_KEY} ubuntu@${KUBE_MASTER_IP} sudo cat /srv/kubernetes/kubecfg.crt >"${HOME}/${kube_cert}" 2>$LOG
ssh -oStrictHostKeyChecking=no -i ${AWS_SSH_KEY} ubuntu@${KUBE_MASTER_IP} sudo cat /srv/kubernetes/kubecfg.key >"${HOME}/${kube_key}" 2>$LOG
ssh -oStrictHostKeyChecking=no -i ${AWS_SSH_KEY} ubuntu@${KUBE_MASTER_IP} sudo cat /srv/kubernetes/ca.crt >"${HOME}/${ca_cert}" 2>$LOG
cat << EOF > ~/.kubernetes_auth
{
@@ -482,33 +515,41 @@ EOF
function kube-down {
instance_ids=$($AWS_CMD describe-instances | get_instance_ids)
$AWS_CMD terminate-instances --instance-ids $instance_ids > $LOG
echo "Waiting for instances deleted"
while true; do
instance_states=$($AWS_CMD describe-instances --instance-ids $instance_ids | expect_instance_states terminated)
if [[ "$instance_states" == "" ]]; then
echo "All instances terminated"
break
else
echo "Instances not yet terminated: $instance_states"
echo "Sleeping for 3 seconds..."
sleep 3
fi
done
if [[ -n ${instance_ids} ]]; then
$AWS_CMD terminate-instances --instance-ids $instance_ids > $LOG
echo "Waiting for instances deleted"
while true; do
instance_states=$($AWS_CMD describe-instances --instance-ids $instance_ids | expect_instance_states terminated)
if [[ "$instance_states" == "" ]]; then
echo "All instances terminated"
break
else
echo "Instances not yet terminated: $instance_states"
echo "Sleeping for 3 seconds..."
sleep 3
fi
done
fi
echo "Deleting VPC"
vpc_id=$($AWS_CMD describe-vpcs | get_vpc_id)
subnet_id=$($AWS_CMD describe-subnets | get_subnet_id $vpc_id)
igw_id=$($AWS_CMD describe-internet-gateways | get_igw_id $vpc_id)
route_table_id=$($AWS_CMD describe-route-tables | get_route_table_id $vpc_id)
sec_group_id=$($AWS_CMD describe-security-groups | get_sec_group_id)
if [[ -n "${sec_group_id}" ]]; then
$AWS_CMD delete-security-group --group-id $sec_group_id > $LOG
fi
$AWS_CMD delete-subnet --subnet-id $subnet_id > $LOG
$AWS_CMD detach-internet-gateway --internet-gateway-id $igw_id --vpc-id $vpc_id > $LOG
$AWS_CMD delete-internet-gateway --internet-gateway-id $igw_id > $LOG
$AWS_CMD delete-security-group --group-id $sec_group_id > $LOG
$AWS_CMD delete-route --route-table-id $route_table_id --destination-cidr-block 0.0.0.0/0 > $LOG
$AWS_CMD delete-vpc --vpc-id $vpc_id > $LOG
vpc_id=$($AWS_CMD describe-vpcs | get_vpc_id)
if [[ -n "${vpc_id}" ]]; then
subnet_id=$($AWS_CMD describe-subnets | get_subnet_id $vpc_id)
igw_id=$($AWS_CMD describe-internet-gateways | get_igw_id $vpc_id)
route_table_id=$($AWS_CMD describe-route-tables | get_route_table_id $vpc_id)
$AWS_CMD delete-subnet --subnet-id $subnet_id > $LOG
$AWS_CMD detach-internet-gateway --internet-gateway-id $igw_id --vpc-id $vpc_id > $LOG
$AWS_CMD delete-internet-gateway --internet-gateway-id $igw_id > $LOG
$AWS_CMD delete-route --route-table-id $route_table_id --destination-cidr-block 0.0.0.0/0 > $LOG
$AWS_CMD delete-vpc --vpc-id $vpc_id > $LOG
fi
}
function setup-logging-firewall {
@@ -518,3 +559,127 @@ function setup-logging-firewall {
function teardown-logging-firewall {
echo "TODO: teardown logging"
}
# -----------------------------------------------------------------------------
# Cluster specific test helpers used from hack/e2e-test.sh
# Execute prior to running tests to build a release if required for env.
#
# Assumed Vars:
# KUBE_ROOT
function test-build-release {
# Make a release
"${KUBE_ROOT}/build/release.sh"
}
# Execute prior to running tests to initialize required structure. This is
# called from hack/e2e.go only when running -up (it is run after kube-up).
#
# Assumed vars:
# Variables from config.sh
function test-setup {
echo "test-setup complete"
}
# Execute after running tests to perform any required clean-up. This is called
# from hack/e2e.go
function test-teardown {
# detect-project
# echo "Shutting down test cluster in background."
# gcloud compute firewall-rules delete \
# --project "${PROJECT}" \
# --quiet \
# "${MINION_TAG}-${INSTANCE_PREFIX}-http-alt" || true
echo "Shutting down test cluster."
"${KUBE_ROOT}/cluster/kube-down.sh"
}
# SSH to a node by name ($1) and run a command ($2).
function ssh-to-node {
local node="$1"
local cmd="$2"
for try in $(seq 1 5); do
if gcloud compute ssh --ssh-flag="-o LogLevel=quiet" --project "${PROJECT}" --zone="${ZONE}" "${node}" --command "${cmd}"; then
break
fi
done
}
# Restart the kube-proxy on a node ($1)
function restart-kube-proxy {
ssh-to-node "$1" "sudo /etc/init.d/kube-proxy restart"
}
# Setup monitoring firewalls using heapster and InfluxDB
function setup-monitoring-firewall {
if [[ "${ENABLE_CLUSTER_MONITORING}" != "true" ]]; then
return
fi
# TODO: Support monitoring firewall
echo "Cluster monitoring setup is not (yet) supported on AWS"
}
function teardown-monitoring-firewall {
if [[ "${ENABLE_CLUSTER_MONITORING}" != "true" ]]; then
return
fi
# TODO: Support monitoring firewall
}
function setup-logging-firewall {
# If logging with Fluentd to Elasticsearch is enabled then create pods
# and services for Elasticsearch (for ingesting logs) and Kibana (for
# viewing logs).
if [[ "${ENABLE_NODE_LOGGING-}" != "true" ]] || \
[[ "${LOGGING_DESTINATION-}" != "elasticsearch" ]] || \
[[ "${ENABLE_CLUSTER_LOGGING-}" != "true" ]]; then
return
fi
# TODO: Support logging
echo "Logging setup is not (yet) supported on AWS"
# detect-project
# gcloud compute firewall-rules create "${INSTANCE_PREFIX}-fluentd-elasticsearch-logging" --project "${PROJECT}" \
# --allow tcp:5601 tcp:9200 tcp:9300 --target-tags "${MINION_TAG}" --network="${NETWORK}"
#
# # This should be nearly instant once kube-addons gets a chance to
# # run, and we already know we can hit the apiserver, but it's still
# # worth checking.
# echo "waiting for logging services to be created by the master."
# local kubectl="${KUBE_ROOT}/cluster/kubectl.sh"
# for i in `seq 1 10`; do
# if "${kubectl}" get services -l name=kibana-logging -o template -t {{range.items}}{{.id}}{{end}} | grep -q kibana-logging &&
# "${kubectl}" get services -l name=elasticsearch-logging -o template -t {{range.items}}{{.id}}{{end}} | grep -q elasticsearch-logging; then
# break
# fi
# sleep 10
# done
#
# local -r region="${ZONE::-2}"
# local -r es_ip=$(gcloud compute forwarding-rules --project "${PROJECT}" describe --region "${region}" elasticsearch-logging | grep IPAddress | awk '{print $2}')
# local -r kibana_ip=$(gcloud compute forwarding-rules --project "${PROJECT}" describe --region "${region}" kibana-logging | grep IPAddress | awk '{print $2}')
# echo
# echo -e "${color_green}Cluster logs are ingested into Elasticsearch running at ${color_yellow}http://${es_ip}:9200"
# echo -e "${color_green}Kibana logging dashboard will be available at ${color_yellow}http://${kibana_ip}:5601${color_norm}"
# echo
}
function teardown-logging-firewall {
if [[ "${ENABLE_NODE_LOGGING-}" != "true" ]] || \
[[ "${LOGGING_DESTINATION-}" != "elasticsearch" ]] || \
[[ "${ENABLE_CLUSTER_LOGGING-}" != "true" ]]; then
return
fi
# TODO: Support logging
}
# Perform preparations required to run e2e tests
function prepare-e2e() {
# (AWS runs detect-project, I don't think we need to anything)
# Note: we can't print anything here, or else the test tools will break with the extra output
return
}