From 1607ac1a7ab7ee6c13ef01ab9141f705600a1145 Mon Sep 17 00:00:00 2001 From: Justin Santa Barbara Date: Tue, 3 Nov 2015 10:08:46 -0500 Subject: [PATCH 1/6] AWS: use filters in get_igw_id & get_subnet_id Also remove unused get_route_table_id --- cluster/aws/util.sh | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/cluster/aws/util.sh b/cluster/aws/util.sh index 5c68354d179..df9599b1ec1 100755 --- a/cluster/aws/util.sh +++ b/cluster/aws/util.sh @@ -82,15 +82,20 @@ function get_vpc_id { } function get_subnet_id { - python -c "import json,sys; lst = [str(subnet['SubnetId']) for subnet in json.load(sys.stdin)['Subnets'] if subnet['VpcId'] == '$1' and subnet['AvailabilityZone'] == '$2']; print ''.join(lst)" + local vpc_id=$1 + local az=$2 + $AWS_CMD --output text describe-subnets \ + --filters Name=tag:KubernetesCluster,Values=${CLUSTER_ID} \ + Name=availabilityZone,Values=${az} \ + Name=vpc-id,Values=${vpc_id} \ + --query Subnets[].SubnetId } function get_igw_id { - python -c "import json,sys; lst = [str(igw['InternetGatewayId']) for igw in json.load(sys.stdin)['InternetGateways'] for attachment in igw['Attachments'] if attachment['VpcId'] == '$1']; print ''.join(lst)" -} - -function get_route_table_id { - python -c "import json,sys; lst = [str(route_table['RouteTableId']) for route_table in json.load(sys.stdin)['RouteTables'] if route_table['VpcId'] == '$1']; print ''.join(lst)" + local vpc_id=$1 + $AWS_CMD --output text describe-internet-gateways \ + --filters Name=attachment.vpc-id,Values=${vpc_id} \ + --query InternetGateways[].InternetGatewayId } function get_elbs_in_vpc { @@ -713,7 +718,7 @@ function kube-up { create-dhcp-option-set if [[ -z "${SUBNET_ID:-}" ]]; then - SUBNET_ID=$($AWS_CMD describe-subnets --filters Name=tag:KubernetesCluster,Values=${CLUSTER_ID} | get_subnet_id $VPC_ID $ZONE) + SUBNET_ID=$(get_subnet_id $VPC_ID $ZONE) fi if [[ -z "$SUBNET_ID" ]]; then @@ -729,7 +734,7 @@ function kube-up { echo "Using subnet $SUBNET_ID" - IGW_ID=$($AWS_CMD describe-internet-gateways | get_igw_id $VPC_ID) + IGW_ID=$(get_igw_id $VPC_ID) if [[ -z "$IGW_ID" ]]; then echo "Creating Internet Gateway." IGW_ID=$($AWS_CMD create-internet-gateway | json_val '["InternetGateway"]["InternetGatewayId"]') From df3897c4aba54fec84d7e4062d55ece3f4324898 Mon Sep 17 00:00:00 2001 From: Justin Santa Barbara Date: Tue, 3 Nov 2015 10:10:51 -0500 Subject: [PATCH 2/6] Split main kube-up function into sub-functions Makes it a little easier to read, also lets us reuse a master in the next commit without a complex diff. --- cluster/aws/util.sh | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/cluster/aws/util.sh b/cluster/aws/util.sh index df9599b1ec1..72117dfc00b 100755 --- a/cluster/aws/util.sh +++ b/cluster/aws/util.sh @@ -796,6 +796,24 @@ function kube-up { # HTTPS to the master is allowed (for API access) authorize-security-group-ingress "${MASTER_SG_ID}" "--protocol tcp --port 443 --cidr 0.0.0.0/0" + # Create the master + start-master + + # Start minions + start-minions + + # Wait for the master to be ready + wait-master + + # Build ~/.kube/config + build-config + + # Check the cluster is OK + check-cluster +} + +# Starts the master node +function start-master() { # Get or create master persistent volume ensure-master-pd @@ -957,7 +975,10 @@ function kube-up { attempt=$(($attempt+1)) sleep 10 done +} +# Creates an ASG for the minion nodes +function start-minions() { echo "Creating minion configuration" generate-minion-user-data > "${KUBE_TEMP}/minion-user-data" local public_ip_option @@ -1012,7 +1033,10 @@ function kube-up { attempt=$(($attempt+1)) sleep 10 done +} +# Wait for the master to be started +function wait-master() { detect-master > $LOG detect-minions > $LOG @@ -1043,7 +1067,11 @@ function kube-up { done echo "Kubernetes cluster created." +} +# Creates the ~/.kube/config file, getting the information from the master +# The master much be running and set in KUBE_MASTER_IP +function build-config() { # TODO use token instead of kube_auth export KUBE_CERT="/tmp/$RANDOM-kubecfg.crt" export KUBE_KEY="/tmp/$RANDOM-kubecfg.key" @@ -1062,7 +1090,10 @@ function kube-up { create-kubeconfig ) +} +# Sanity check the cluster and print confirmation messages +function check-cluster() { echo "Sanity checking cluster..." sleep 5 From d64643fe261394ce918fdd0e270eb60860f512f1 Mon Sep 17 00:00:00 2001 From: Justin Santa Barbara Date: Tue, 3 Nov 2015 11:01:41 -0500 Subject: [PATCH 3/6] AWS: Experimental support for multiple subnets/AZs in kube-up By setting KUBE_SHARE_MASTER=true we reuse an existing master, rather than creating a new one. By setting KUBE_SUBNET_CIDR=172.20.1.0/24 you can specify the CIDR for a new subnet, avoiding conflicts. Both these options are documented only in kube-up and clearly marked as 'experimental' i.e. likely to change. By combining these, you can kube-up a cluster normally, and then kube-up a cluster in a different AZ, and the new nodes will attach to the same master. KUBE_SHARE_MASTER is also useful for addding a second node auto-scaling-group, for example if you wanted to mix spot & on-demand instances. --- cluster/aws/util.sh | 69 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 52 insertions(+), 17 deletions(-) diff --git a/cluster/aws/util.sh b/cluster/aws/util.sh index 72117dfc00b..a658c5fab66 100755 --- a/cluster/aws/util.sh +++ b/cluster/aws/util.sh @@ -16,6 +16,19 @@ # A library of helper functions and constant for the local config. +# Experimental flags can be removed/renamed at any time. +# The intent is to allow experimentation/advanced functionality before we +# are ready to commit to supporting it. +# Experimental functionality: +# KUBE_SHARE_MASTER=true +# Detects an existing master and reuse it; useful if you want to +# create more nodes, perhaps with a different instance type or in +# a different subnet/AZ +# KUBE_SUBNET_CIDR=172.20.1.0/24 +# Override the default subnet CIDR; useful if you want to create +# a second subnet. The default subnet is 172.20.0.0/24. The VPC +# is created with 172.20.0.0/16; you must pick a sub-CIDR of that. + # Use the config file specified in $KUBE_CONFIG_FILE, or default to # config-default.sh. KUBE_ROOT=$(dirname "${BASH_SOURCE}")/../.. @@ -25,7 +38,9 @@ source "${KUBE_ROOT}/cluster/common.sh" ALLOCATE_NODE_CIDRS=true NODE_INSTANCE_PREFIX="${INSTANCE_PREFIX}-minion" -ASG_NAME="${NODE_INSTANCE_PREFIX}-group" + +# The ASG name must be unique, so we include the zone +ASG_NAME="${NODE_INSTANCE_PREFIX}-group-${ZONE}" # We could allow the master disk volume id to be specified in future MASTER_DISK_ID= @@ -53,9 +68,15 @@ AWS_CMD="aws --output json ec2" AWS_ELB_CMD="aws --output json elb" AWS_ASG_CMD="aws --output json autoscaling" -INTERNAL_IP_BASE=172.20.0 +VPC_CIDR_BASE=172.20 MASTER_IP_SUFFIX=.9 -MASTER_INTERNAL_IP=${INTERNAL_IP_BASE}${MASTER_IP_SUFFIX} +MASTER_INTERNAL_IP=${VPC_CIDR_BASE}.0${MASTER_IP_SUFFIX} +VPC_CIDR=${VPC_CIDR_BASE}.0.0/16 +SUBNET_CIDR=${VPC_CIDR_BASE}.0.0/24 +if [[ -n "${KUBE_SUBNET_CIDR:-}" ]]; then + echo "Using subnet CIDR override: ${KUBE_SUBNET_CIDR}" + SUBNET_CIDR=${KUBE_SUBNET_CIDR} +fi MASTER_SG_NAME="kubernetes-master-${CLUSTER_ID}" MINION_SG_NAME="kubernetes-minion-${CLUSTER_ID}" @@ -706,7 +727,7 @@ function kube-up { fi if [[ -z "$VPC_ID" ]]; then echo "Creating vpc." - VPC_ID=$($AWS_CMD create-vpc --cidr-block $INTERNAL_IP_BASE.0/16 | json_val '["Vpc"]["VpcId"]') + VPC_ID=$($AWS_CMD create-vpc --cidr-block ${VPC_CIDR} | json_val '["Vpc"]["VpcId"]') $AWS_CMD modify-vpc-attribute --vpc-id $VPC_ID --enable-dns-support '{"Value": true}' > $LOG $AWS_CMD modify-vpc-attribute --vpc-id $VPC_ID --enable-dns-hostnames '{"Value": true}' > $LOG add-tag $VPC_ID Name kubernetes-vpc @@ -723,13 +744,16 @@ function kube-up { if [[ -z "$SUBNET_ID" ]]; then echo "Creating subnet." - SUBNET_ID=$($AWS_CMD create-subnet --cidr-block $INTERNAL_IP_BASE.0/24 --vpc-id $VPC_ID --availability-zone ${ZONE} | json_val '["Subnet"]["SubnetId"]') + SUBNET_ID=$($AWS_CMD create-subnet --cidr-block ${SUBNET_CIDR} --vpc-id $VPC_ID --availability-zone ${ZONE} | json_val '["Subnet"]["SubnetId"]') add-tag $SUBNET_ID KubernetesCluster ${CLUSTER_ID} else EXISTING_CIDR=$($AWS_CMD describe-subnets --subnet-ids ${SUBNET_ID} --query Subnets[].CidrBlock --output text) - echo "Using existing CIDR $EXISTING_CIDR" - INTERNAL_IP_BASE=${EXISTING_CIDR%.*} - MASTER_INTERNAL_IP=${INTERNAL_IP_BASE}${MASTER_IP_SUFFIX} + echo "Using existing subnet with CIDR $EXISTING_CIDR" + VPC_CIDR=$($AWS_CMD describe-vpcs --vpc-ids ${VPC_ID} --query Vpcs[].CidrBlock --output text) + echo "VPC CIDR is $VPC_CIDR" + VPC_CIDR_BASE=${VPC_CIDR%.*.*} + MASTER_INTERNAL_IP=${VPC_CIDR_BASE}.0${MASTER_IP_SUFFIX} + echo "Assuming MASTER_INTERNAL_IP=${MASTER_INTERNAL_IP}" fi echo "Using subnet $SUBNET_ID" @@ -796,17 +820,26 @@ function kube-up { # HTTPS to the master is allowed (for API access) authorize-security-group-ingress "${MASTER_SG_ID}" "--protocol tcp --port 443 --cidr 0.0.0.0/0" - # Create the master - start-master + # KUBE_SHARE_MASTER is used to add minions to an existing master + if [[ "${KUBE_SHARE_MASTER:-}" == "true" ]]; then + # Detect existing master + detect-master - # Start minions - start-minions + # Start minions + start-minions + else + # Create the master + start-master - # Wait for the master to be ready - wait-master + # Start minions + start-minions - # Build ~/.kube/config - build-config + # Wait for the master to be ready + wait-master + + # Build ~/.kube/config + build-config + fi # Check the cluster is OK check-cluster @@ -1038,7 +1071,6 @@ function start-minions() { # Wait for the master to be started function wait-master() { detect-master > $LOG - detect-minions > $LOG # TODO(justinsb): This is really not necessary any more # Wait 3 minutes for cluster to come up. We hit it with a "highstate" after that to @@ -1098,6 +1130,8 @@ function check-cluster() { sleep 5 + detect-minions > $LOG + # Don't bail on errors, we want to be able to print some info. set +e @@ -1167,6 +1201,7 @@ function kube-down { echo "Deleting auto-scaling group: ${ASG_NAME}" ${AWS_ASG_CMD} delete-auto-scaling-group --force-delete --auto-scaling-group-name ${ASG_NAME} fi + if [[ -n $(${AWS_ASG_CMD} --output text describe-launch-configurations --launch-configuration-names ${ASG_NAME} --query LaunchConfigurations[].LaunchConfigurationName) ]]; then echo "Deleting auto-scaling launch configuration: ${ASG_NAME}" ${AWS_ASG_CMD} delete-launch-configuration --launch-configuration-name ${ASG_NAME} From cf86ecc6046359b3e84e8bf59590d6527b5f9e7a Mon Sep 17 00:00:00 2001 From: Justin Santa Barbara Date: Tue, 3 Nov 2015 11:16:18 -0500 Subject: [PATCH 4/6] AWS: Only match minions in the current ASG i.e. don't assume there is only one ASG any more --- cluster/aws/util.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/cluster/aws/util.sh b/cluster/aws/util.sh index a658c5fab66..2ff0d076d94 100755 --- a/cluster/aws/util.sh +++ b/cluster/aws/util.sh @@ -189,6 +189,7 @@ function query-running-minions () { --filters Name=instance-state-name,Values=running \ Name=vpc-id,Values=${VPC_ID} \ Name=tag:KubernetesCluster,Values=${CLUSTER_ID} \ + Name=tag:aws:autoscaling:groupName,Values=${ASG_NAME} \ Name=tag:Role,Values=${MINION_TAG} \ --query ${query} } From 41743ce87a73aee2ec41ac7890e78cf4c37790fa Mon Sep 17 00:00:00 2001 From: Justin Santa Barbara Date: Tue, 3 Nov 2015 11:50:50 -0500 Subject: [PATCH 5/6] AWS: Delete ASGs by following instances We can't tag ASGs, but we can see what instances are running in an ASG, and we can match those by our tags. So look for our running instances, and look for the ASGs that created them, and delete those. This can be defeated (most notably if users change the ASG size to 0), but it is safer that other deletion methods. --- cluster/aws/util.sh | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/cluster/aws/util.sh b/cluster/aws/util.sh index 2ff0d076d94..04e3fc607cb 100755 --- a/cluster/aws/util.sh +++ b/cluster/aws/util.sh @@ -1198,21 +1198,26 @@ function kube-down { done fi - if [[ -n $(${AWS_ASG_CMD} --output text describe-auto-scaling-groups --auto-scaling-group-names ${ASG_NAME} --query AutoScalingGroups[].AutoScalingGroupName) ]]; then - echo "Deleting auto-scaling group: ${ASG_NAME}" - ${AWS_ASG_CMD} delete-auto-scaling-group --force-delete --auto-scaling-group-name ${ASG_NAME} - fi - - if [[ -n $(${AWS_ASG_CMD} --output text describe-launch-configurations --launch-configuration-names ${ASG_NAME} --query LaunchConfigurations[].LaunchConfigurationName) ]]; then - echo "Deleting auto-scaling launch configuration: ${ASG_NAME}" - ${AWS_ASG_CMD} delete-launch-configuration --launch-configuration-name ${ASG_NAME} - fi - echo "Deleting instances in VPC: ${vpc_id}" instance_ids=$($AWS_CMD --output text describe-instances \ --filters Name=vpc-id,Values=${vpc_id} \ Name=tag:KubernetesCluster,Values=${CLUSTER_ID} \ --query Reservations[].Instances[].InstanceId) + + asg_groups=$($AWS_CMD --output text describe-instances \ + --query 'Reservations[].Instances[].Tags[?Key==`aws:autoscaling:groupName`].Value[]' \ + --instance-ids ${instance_ids}) + for asg_group in ${asg_groups}; do + if [[ -n $(${AWS_ASG_CMD} --output text describe-auto-scaling-groups --auto-scaling-group-names ${asg_group} --query AutoScalingGroups[].AutoScalingGroupName) ]]; then + echo "Deleting auto-scaling group: ${asg_group}" + ${AWS_ASG_CMD} delete-auto-scaling-group --force-delete --auto-scaling-group-name ${asg_group} + fi + if [[ -n $(${AWS_ASG_CMD} --output text describe-launch-configurations --launch-configuration-names ${asg_group} --query LaunchConfigurations[].LaunchConfigurationName) ]]; then + echo "Deleting auto-scaling launch configuration: ${asg_group}" + ${AWS_ASG_CMD} delete-launch-configuration --launch-configuration-name ${asg_group} + fi + done + if [[ -n "${instance_ids}" ]]; then $AWS_CMD terminate-instances --instance-ids ${instance_ids} > $LOG echo "Waiting for instances to be deleted" From 2d5671aec2d6697856259e3b8c01ded8ea1c24c8 Mon Sep 17 00:00:00 2001 From: Justin Santa Barbara Date: Tue, 3 Nov 2015 13:12:17 -0500 Subject: [PATCH 6/6] AWS: Fix kube-up comments per code review --- cluster/aws/util.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cluster/aws/util.sh b/cluster/aws/util.sh index 04e3fc607cb..627b798d452 100755 --- a/cluster/aws/util.sh +++ b/cluster/aws/util.sh @@ -21,7 +21,7 @@ # are ready to commit to supporting it. # Experimental functionality: # KUBE_SHARE_MASTER=true -# Detects an existing master and reuse it; useful if you want to +# Detect and reuse an existing master; useful if you want to # create more nodes, perhaps with a different instance type or in # a different subnet/AZ # KUBE_SUBNET_CIDR=172.20.1.0/24 @@ -39,7 +39,7 @@ ALLOCATE_NODE_CIDRS=true NODE_INSTANCE_PREFIX="${INSTANCE_PREFIX}-minion" -# The ASG name must be unique, so we include the zone +# The Auto Scaling Group (ASG) name must be unique, so we include the zone ASG_NAME="${NODE_INSTANCE_PREFIX}-group-${ZONE}" # We could allow the master disk volume id to be specified in future