diff --git a/cluster/aws/util.sh b/cluster/aws/util.sh index 5c68354d179..627b798d452 100755 --- a/cluster/aws/util.sh +++ b/cluster/aws/util.sh @@ -16,6 +16,19 @@ # A library of helper functions and constant for the local config. +# Experimental flags can be removed/renamed at any time. +# The intent is to allow experimentation/advanced functionality before we +# are ready to commit to supporting it. +# Experimental functionality: +# KUBE_SHARE_MASTER=true +# Detect and reuse an existing master; useful if you want to +# create more nodes, perhaps with a different instance type or in +# a different subnet/AZ +# KUBE_SUBNET_CIDR=172.20.1.0/24 +# Override the default subnet CIDR; useful if you want to create +# a second subnet. The default subnet is 172.20.0.0/24. The VPC +# is created with 172.20.0.0/16; you must pick a sub-CIDR of that. + # Use the config file specified in $KUBE_CONFIG_FILE, or default to # config-default.sh. KUBE_ROOT=$(dirname "${BASH_SOURCE}")/../.. @@ -25,7 +38,9 @@ source "${KUBE_ROOT}/cluster/common.sh" ALLOCATE_NODE_CIDRS=true NODE_INSTANCE_PREFIX="${INSTANCE_PREFIX}-minion" -ASG_NAME="${NODE_INSTANCE_PREFIX}-group" + +# The Auto Scaling Group (ASG) name must be unique, so we include the zone +ASG_NAME="${NODE_INSTANCE_PREFIX}-group-${ZONE}" # We could allow the master disk volume id to be specified in future MASTER_DISK_ID= @@ -53,9 +68,15 @@ AWS_CMD="aws --output json ec2" AWS_ELB_CMD="aws --output json elb" AWS_ASG_CMD="aws --output json autoscaling" -INTERNAL_IP_BASE=172.20.0 +VPC_CIDR_BASE=172.20 MASTER_IP_SUFFIX=.9 -MASTER_INTERNAL_IP=${INTERNAL_IP_BASE}${MASTER_IP_SUFFIX} +MASTER_INTERNAL_IP=${VPC_CIDR_BASE}.0${MASTER_IP_SUFFIX} +VPC_CIDR=${VPC_CIDR_BASE}.0.0/16 +SUBNET_CIDR=${VPC_CIDR_BASE}.0.0/24 +if [[ -n "${KUBE_SUBNET_CIDR:-}" ]]; then + echo "Using subnet CIDR override: ${KUBE_SUBNET_CIDR}" + SUBNET_CIDR=${KUBE_SUBNET_CIDR} +fi MASTER_SG_NAME="kubernetes-master-${CLUSTER_ID}" MINION_SG_NAME="kubernetes-minion-${CLUSTER_ID}" @@ -82,15 +103,20 @@ function get_vpc_id { } function get_subnet_id { - python -c "import json,sys; lst = [str(subnet['SubnetId']) for subnet in json.load(sys.stdin)['Subnets'] if subnet['VpcId'] == '$1' and subnet['AvailabilityZone'] == '$2']; print ''.join(lst)" + local vpc_id=$1 + local az=$2 + $AWS_CMD --output text describe-subnets \ + --filters Name=tag:KubernetesCluster,Values=${CLUSTER_ID} \ + Name=availabilityZone,Values=${az} \ + Name=vpc-id,Values=${vpc_id} \ + --query Subnets[].SubnetId } function get_igw_id { - python -c "import json,sys; lst = [str(igw['InternetGatewayId']) for igw in json.load(sys.stdin)['InternetGateways'] for attachment in igw['Attachments'] if attachment['VpcId'] == '$1']; print ''.join(lst)" -} - -function get_route_table_id { - python -c "import json,sys; lst = [str(route_table['RouteTableId']) for route_table in json.load(sys.stdin)['RouteTables'] if route_table['VpcId'] == '$1']; print ''.join(lst)" + local vpc_id=$1 + $AWS_CMD --output text describe-internet-gateways \ + --filters Name=attachment.vpc-id,Values=${vpc_id} \ + --query InternetGateways[].InternetGatewayId } function get_elbs_in_vpc { @@ -163,6 +189,7 @@ function query-running-minions () { --filters Name=instance-state-name,Values=running \ Name=vpc-id,Values=${VPC_ID} \ Name=tag:KubernetesCluster,Values=${CLUSTER_ID} \ + Name=tag:aws:autoscaling:groupName,Values=${ASG_NAME} \ Name=tag:Role,Values=${MINION_TAG} \ --query ${query} } @@ -701,7 +728,7 @@ function kube-up { fi if [[ -z "$VPC_ID" ]]; then echo "Creating vpc." - VPC_ID=$($AWS_CMD create-vpc --cidr-block $INTERNAL_IP_BASE.0/16 | json_val '["Vpc"]["VpcId"]') + VPC_ID=$($AWS_CMD create-vpc --cidr-block ${VPC_CIDR} | json_val '["Vpc"]["VpcId"]') $AWS_CMD modify-vpc-attribute --vpc-id $VPC_ID --enable-dns-support '{"Value": true}' > $LOG $AWS_CMD modify-vpc-attribute --vpc-id $VPC_ID --enable-dns-hostnames '{"Value": true}' > $LOG add-tag $VPC_ID Name kubernetes-vpc @@ -713,23 +740,26 @@ function kube-up { create-dhcp-option-set if [[ -z "${SUBNET_ID:-}" ]]; then - SUBNET_ID=$($AWS_CMD describe-subnets --filters Name=tag:KubernetesCluster,Values=${CLUSTER_ID} | get_subnet_id $VPC_ID $ZONE) + SUBNET_ID=$(get_subnet_id $VPC_ID $ZONE) fi if [[ -z "$SUBNET_ID" ]]; then echo "Creating subnet." - SUBNET_ID=$($AWS_CMD create-subnet --cidr-block $INTERNAL_IP_BASE.0/24 --vpc-id $VPC_ID --availability-zone ${ZONE} | json_val '["Subnet"]["SubnetId"]') + SUBNET_ID=$($AWS_CMD create-subnet --cidr-block ${SUBNET_CIDR} --vpc-id $VPC_ID --availability-zone ${ZONE} | json_val '["Subnet"]["SubnetId"]') add-tag $SUBNET_ID KubernetesCluster ${CLUSTER_ID} else EXISTING_CIDR=$($AWS_CMD describe-subnets --subnet-ids ${SUBNET_ID} --query Subnets[].CidrBlock --output text) - echo "Using existing CIDR $EXISTING_CIDR" - INTERNAL_IP_BASE=${EXISTING_CIDR%.*} - MASTER_INTERNAL_IP=${INTERNAL_IP_BASE}${MASTER_IP_SUFFIX} + echo "Using existing subnet with CIDR $EXISTING_CIDR" + VPC_CIDR=$($AWS_CMD describe-vpcs --vpc-ids ${VPC_ID} --query Vpcs[].CidrBlock --output text) + echo "VPC CIDR is $VPC_CIDR" + VPC_CIDR_BASE=${VPC_CIDR%.*.*} + MASTER_INTERNAL_IP=${VPC_CIDR_BASE}.0${MASTER_IP_SUFFIX} + echo "Assuming MASTER_INTERNAL_IP=${MASTER_INTERNAL_IP}" fi echo "Using subnet $SUBNET_ID" - IGW_ID=$($AWS_CMD describe-internet-gateways | get_igw_id $VPC_ID) + IGW_ID=$(get_igw_id $VPC_ID) if [[ -z "$IGW_ID" ]]; then echo "Creating Internet Gateway." IGW_ID=$($AWS_CMD create-internet-gateway | json_val '["InternetGateway"]["InternetGatewayId"]') @@ -791,6 +821,33 @@ function kube-up { # HTTPS to the master is allowed (for API access) authorize-security-group-ingress "${MASTER_SG_ID}" "--protocol tcp --port 443 --cidr 0.0.0.0/0" + # KUBE_SHARE_MASTER is used to add minions to an existing master + if [[ "${KUBE_SHARE_MASTER:-}" == "true" ]]; then + # Detect existing master + detect-master + + # Start minions + start-minions + else + # Create the master + start-master + + # Start minions + start-minions + + # Wait for the master to be ready + wait-master + + # Build ~/.kube/config + build-config + fi + + # Check the cluster is OK + check-cluster +} + +# Starts the master node +function start-master() { # Get or create master persistent volume ensure-master-pd @@ -952,7 +1009,10 @@ function kube-up { attempt=$(($attempt+1)) sleep 10 done +} +# Creates an ASG for the minion nodes +function start-minions() { echo "Creating minion configuration" generate-minion-user-data > "${KUBE_TEMP}/minion-user-data" local public_ip_option @@ -1007,9 +1067,11 @@ function kube-up { attempt=$(($attempt+1)) sleep 10 done +} +# Wait for the master to be started +function wait-master() { detect-master > $LOG - detect-minions > $LOG # TODO(justinsb): This is really not necessary any more # Wait 3 minutes for cluster to come up. We hit it with a "highstate" after that to @@ -1038,7 +1100,11 @@ function kube-up { done echo "Kubernetes cluster created." +} +# Creates the ~/.kube/config file, getting the information from the master +# The master much be running and set in KUBE_MASTER_IP +function build-config() { # TODO use token instead of kube_auth export KUBE_CERT="/tmp/$RANDOM-kubecfg.crt" export KUBE_KEY="/tmp/$RANDOM-kubecfg.key" @@ -1057,11 +1123,16 @@ function kube-up { create-kubeconfig ) +} +# Sanity check the cluster and print confirmation messages +function check-cluster() { echo "Sanity checking cluster..." sleep 5 + detect-minions > $LOG + # Don't bail on errors, we want to be able to print some info. set +e @@ -1127,20 +1198,26 @@ function kube-down { done fi - if [[ -n $(${AWS_ASG_CMD} --output text describe-auto-scaling-groups --auto-scaling-group-names ${ASG_NAME} --query AutoScalingGroups[].AutoScalingGroupName) ]]; then - echo "Deleting auto-scaling group: ${ASG_NAME}" - ${AWS_ASG_CMD} delete-auto-scaling-group --force-delete --auto-scaling-group-name ${ASG_NAME} - fi - if [[ -n $(${AWS_ASG_CMD} --output text describe-launch-configurations --launch-configuration-names ${ASG_NAME} --query LaunchConfigurations[].LaunchConfigurationName) ]]; then - echo "Deleting auto-scaling launch configuration: ${ASG_NAME}" - ${AWS_ASG_CMD} delete-launch-configuration --launch-configuration-name ${ASG_NAME} - fi - echo "Deleting instances in VPC: ${vpc_id}" instance_ids=$($AWS_CMD --output text describe-instances \ --filters Name=vpc-id,Values=${vpc_id} \ Name=tag:KubernetesCluster,Values=${CLUSTER_ID} \ --query Reservations[].Instances[].InstanceId) + + asg_groups=$($AWS_CMD --output text describe-instances \ + --query 'Reservations[].Instances[].Tags[?Key==`aws:autoscaling:groupName`].Value[]' \ + --instance-ids ${instance_ids}) + for asg_group in ${asg_groups}; do + if [[ -n $(${AWS_ASG_CMD} --output text describe-auto-scaling-groups --auto-scaling-group-names ${asg_group} --query AutoScalingGroups[].AutoScalingGroupName) ]]; then + echo "Deleting auto-scaling group: ${asg_group}" + ${AWS_ASG_CMD} delete-auto-scaling-group --force-delete --auto-scaling-group-name ${asg_group} + fi + if [[ -n $(${AWS_ASG_CMD} --output text describe-launch-configurations --launch-configuration-names ${asg_group} --query LaunchConfigurations[].LaunchConfigurationName) ]]; then + echo "Deleting auto-scaling launch configuration: ${asg_group}" + ${AWS_ASG_CMD} delete-launch-configuration --launch-configuration-name ${asg_group} + fi + done + if [[ -n "${instance_ids}" ]]; then $AWS_CMD terminate-instances --instance-ids ${instance_ids} > $LOG echo "Waiting for instances to be deleted"