mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-05 18:24:07 +00:00
AWS: Use auto-scaling group to run minions
This uses the dynamic CIDR work, and we set source-dest-check to false when we configure the route (which kind-of makes sense)
This commit is contained in:
parent
d9dabd749c
commit
2a5ed2f086
@ -38,7 +38,6 @@ IAM_PROFILE_MINION="kubernetes-minion"
|
||||
LOG="/dev/null"
|
||||
|
||||
MASTER_NAME="${INSTANCE_PREFIX}-master"
|
||||
MINION_NAMES=($(eval echo ${INSTANCE_PREFIX}-minion-{1..${NUM_MINIONS}}))
|
||||
MASTER_TAG="${INSTANCE_PREFIX}-master"
|
||||
MINION_TAG="${INSTANCE_PREFIX}-minion"
|
||||
MINION_SCOPES=""
|
||||
|
@ -34,7 +34,6 @@ IAM_PROFILE_MINION="kubernetes-minion"
|
||||
LOG="/dev/null"
|
||||
|
||||
MASTER_NAME="${INSTANCE_PREFIX}-master"
|
||||
MINION_NAMES=($(eval echo ${INSTANCE_PREFIX}-minion-{1..${NUM_MINIONS}}))
|
||||
MASTER_TAG="${INSTANCE_PREFIX}-master"
|
||||
MINION_TAG="${INSTANCE_PREFIX}-minion"
|
||||
MINION_SCOPES=""
|
||||
|
@ -25,7 +25,6 @@ function detect-minion-image() {
|
||||
}
|
||||
|
||||
function generate-minion-user-data {
|
||||
i=$1
|
||||
# We pipe this to the ami as a startup script in the user-data field. Requires a compatible ami
|
||||
echo "#! /bin/bash"
|
||||
echo "SALT_MASTER='${MASTER_INTERNAL_IP}'"
|
||||
@ -37,8 +36,7 @@ function generate-minion-user-data {
|
||||
}
|
||||
|
||||
function check-minion() {
|
||||
local minion_name=$1
|
||||
local minion_ip=$2
|
||||
local minion_ip=$1
|
||||
|
||||
local output=$(ssh -oStrictHostKeyChecking=no -i "${AWS_SSH_KEY}" ${SSH_USER}@$minion_ip sudo docker ps -a 2>/dev/null)
|
||||
if [[ -z "${output}" ]]; then
|
||||
|
@ -24,6 +24,9 @@ source "${KUBE_ROOT}/cluster/common.sh"
|
||||
|
||||
ALLOCATE_NODE_CIDRS=true
|
||||
|
||||
NODE_INSTANCE_PREFIX="${INSTANCE_PREFIX}-minion"
|
||||
ASG_NAME="${NODE_INSTANCE_PREFIX}-group"
|
||||
|
||||
case "${KUBE_OS_DISTRIBUTION}" in
|
||||
ubuntu|wheezy|coreos)
|
||||
source "${KUBE_ROOT}/cluster/aws/${KUBE_OS_DISTRIBUTION}/util.sh"
|
||||
@ -40,6 +43,7 @@ AWS_REGION=${ZONE%?}
|
||||
export AWS_DEFAULT_REGION=${AWS_REGION}
|
||||
AWS_CMD="aws --output json ec2"
|
||||
AWS_ELB_CMD="aws --output json elb"
|
||||
AWS_ASG_CMD="aws --output json autoscaling"
|
||||
|
||||
INTERNAL_IP_BASE=172.20.0
|
||||
MASTER_IP_SUFFIX=.9
|
||||
@ -93,22 +97,20 @@ function expect_instance_states {
|
||||
python -c "import json,sys; lst = [str(instance['InstanceId']) for reservation in json.load(sys.stdin)['Reservations'] for instance in reservation['Instances'] if instance['State']['Name'] != '$1']; print ' '.join(lst)"
|
||||
}
|
||||
|
||||
function get_instance_public_ip {
|
||||
function get_instanceid_from_name {
|
||||
local tagName=$1
|
||||
$AWS_CMD --output text describe-instances \
|
||||
--filters Name=tag:Name,Values=${tagName} \
|
||||
Name=instance-state-name,Values=running \
|
||||
Name=tag:KubernetesCluster,Values=${CLUSTER_ID} \
|
||||
--query Reservations[].Instances[].NetworkInterfaces[0].Association.PublicIp
|
||||
--query Reservations[].Instances[].InstanceId
|
||||
}
|
||||
|
||||
function get_instance_private_ip {
|
||||
local tagName=$1
|
||||
function get_instance_public_ip {
|
||||
local instance_id=$1
|
||||
$AWS_CMD --output text describe-instances \
|
||||
--filters Name=tag:Name,Values=${tagName} \
|
||||
Name=instance-state-name,Values=running \
|
||||
Name=tag:KubernetesCluster,Values=${CLUSTER_ID} \
|
||||
--query Reservations[].Instances[].NetworkInterfaces[0].PrivateIpAddress
|
||||
--instance-ids ${instance_id} \
|
||||
--query Reservations[].Instances[].NetworkInterfaces[0].Association.PublicIp
|
||||
}
|
||||
|
||||
# Gets a security group id, by name ($1)
|
||||
@ -124,17 +126,49 @@ function get_security_group_id {
|
||||
|
||||
function detect-master () {
|
||||
KUBE_MASTER=${MASTER_NAME}
|
||||
if [[ -z "${KUBE_MASTER_IP-}" ]]; then
|
||||
KUBE_MASTER_IP=$(get_instance_public_ip $MASTER_NAME)
|
||||
if [[ -z "${KUBE_MASTER_ID-}" ]]; then
|
||||
KUBE_MASTER_ID=$(get_instanceid_from_name ${MASTER_NAME})
|
||||
fi
|
||||
if [[ -z "${KUBE_MASTER_ID-}" ]]; then
|
||||
echo "Could not detect Kubernetes master node. Make sure you've launched a cluster with 'kube-up.sh'"
|
||||
exit 1
|
||||
fi
|
||||
if [[ -z "${KUBE_MASTER_IP-}" ]]; then
|
||||
echo "Could not detect Kubernetes master node. Make sure you've launched a cluster with 'kube-up.sh'"
|
||||
KUBE_MASTER_IP=$(get_instance_public_ip ${KUBE_MASTER_ID})
|
||||
fi
|
||||
if [[ -z "${KUBE_MASTER_IP-}" ]]; then
|
||||
echo "Could not detect Kubernetes master node IP. Make sure you've launched a cluster with 'kube-up.sh'"
|
||||
exit 1
|
||||
fi
|
||||
echo "Using master: $KUBE_MASTER (external IP: $KUBE_MASTER_IP)"
|
||||
}
|
||||
|
||||
|
||||
function query-running-minions () {
|
||||
local query=$1
|
||||
$AWS_CMD --output text describe-instances \
|
||||
--filters Name=instance-state-name,Values=running \
|
||||
Name=vpc-id,Values=${VPC_ID} \
|
||||
Name=tag:KubernetesCluster,Values=${CLUSTER_ID} \
|
||||
Name=tag:Role,Values=${MINION_TAG} \
|
||||
--query ${query}
|
||||
}
|
||||
|
||||
function find-running-minions () {
|
||||
MINION_IDS=()
|
||||
MINION_NAMES=()
|
||||
for id in $(query-running-minions "Reservations[].Instances[].InstanceId"); do
|
||||
MINION_IDS+=("${id}")
|
||||
|
||||
# We use the minion ids as the name
|
||||
MINION_NAMES+=("${id}")
|
||||
done
|
||||
}
|
||||
|
||||
function detect-minions () {
|
||||
find-running-minions
|
||||
|
||||
# This is inefficient, but we want MINION_NAMES / MINION_IDS to be ordered the same as KUBE_MINION_IP_ADDRESSES
|
||||
KUBE_MINION_IP_ADDRESSES=()
|
||||
for (( i=0; i<${#MINION_NAMES[@]}; i++)); do
|
||||
local minion_ip
|
||||
@ -143,9 +177,10 @@ function detect-minions () {
|
||||
else
|
||||
minion_ip=$(get_instance_private_ip ${MINION_NAMES[$i]})
|
||||
fi
|
||||
echo "Found ${MINION_NAMES[$i]} at ${minion_ip}"
|
||||
echo "Found minion ${i}: ${MINION_NAMES[$i]} @ ${minion_ip}"
|
||||
KUBE_MINION_IP_ADDRESSES+=("${minion_ip}")
|
||||
done
|
||||
|
||||
if [[ -z "$KUBE_MINION_IP_ADDRESSES" ]]; then
|
||||
echo "Could not detect Kubernetes minion nodes. Make sure you've launched a cluster with 'kube-up.sh'"
|
||||
exit 1
|
||||
@ -696,7 +731,7 @@ function kube-up {
|
||||
echo "cd /var/cache/kubernetes-install"
|
||||
echo "readonly SALT_MASTER='${MASTER_INTERNAL_IP}'"
|
||||
echo "readonly INSTANCE_PREFIX='${INSTANCE_PREFIX}'"
|
||||
echo "readonly NODE_INSTANCE_PREFIX='${INSTANCE_PREFIX}-minion'"
|
||||
echo "readonly NODE_INSTANCE_PREFIX='${NODE_INSTANCE_PREFIX}'"
|
||||
echo "readonly CLUSTER_IP_RANGE='${CLUSTER_IP_RANGE}'"
|
||||
echo "readonly ALLOCATE_NODE_CIDRS='${ALLOCATE_NODE_CIDRS}'"
|
||||
echo "readonly SERVER_BINARY_TAR_URL='${SERVER_BINARY_TAR_URL}'"
|
||||
@ -749,7 +784,7 @@ function kube-up {
|
||||
|
||||
while true; do
|
||||
echo -n Attempt "$(($attempt+1))" to check for master node
|
||||
local ip=$(get_instance_public_ip $MASTER_NAME)
|
||||
local ip=$(get_instance_public_ip ${master_id})
|
||||
if [[ -z "${ip}" ]]; then
|
||||
if (( attempt > 30 )); then
|
||||
echo
|
||||
@ -827,62 +862,65 @@ function kube-up {
|
||||
sleep 10
|
||||
done
|
||||
|
||||
MINION_IDS=()
|
||||
for (( i=0; i<${#MINION_NAMES[@]}; i++)); do
|
||||
echo "Starting Minion (${MINION_NAMES[$i]})"
|
||||
generate-minion-user-data $i > "${KUBE_TEMP}/minion-user-data-${i}"
|
||||
|
||||
local public_ip_option
|
||||
if [[ "${ENABLE_MINION_PUBLIC_IP}" == "true" ]]; then
|
||||
public_ip_option="--associate-public-ip-address"
|
||||
else
|
||||
public_ip_option="--no-associate-public-ip-address"
|
||||
fi
|
||||
|
||||
minion_id=$($AWS_CMD run-instances \
|
||||
echo "Creating minion configuration"
|
||||
generate-minion-user-data > "${KUBE_TEMP}/minion-user-data"
|
||||
local public_ip_option
|
||||
if [[ "${ENABLE_MINION_PUBLIC_IP}" == "true" ]]; then
|
||||
public_ip_option="--associate-public-ip-address"
|
||||
else
|
||||
public_ip_option="--no-associate-public-ip-address"
|
||||
fi
|
||||
${AWS_ASG_CMD} create-launch-configuration \
|
||||
--launch-configuration-name ${ASG_NAME} \
|
||||
--image-id $KUBE_MINION_IMAGE \
|
||||
--iam-instance-profile Name=$IAM_PROFILE_MINION \
|
||||
--iam-instance-profile ${IAM_PROFILE_MINION} \
|
||||
--instance-type $MINION_SIZE \
|
||||
--subnet-id $SUBNET_ID \
|
||||
--private-ip-address $INTERNAL_IP_BASE.1${i} \
|
||||
--key-name ${AWS_SSH_KEY_NAME} \
|
||||
--security-group-ids ${MINION_SG_ID} \
|
||||
--security-groups ${MINION_SG_ID} \
|
||||
${public_ip_option} \
|
||||
--block-device-mappings "${BLOCK_DEVICE_MAPPINGS}" \
|
||||
--user-data "file://${KUBE_TEMP}/minion-user-data-${i}" | json_val '["Instances"][0]["InstanceId"]')
|
||||
--user-data "file://${KUBE_TEMP}/minion-user-data"
|
||||
|
||||
add-tag $minion_id Name ${MINION_NAMES[$i]}
|
||||
add-tag $minion_id Role $MINION_TAG
|
||||
add-tag $minion_id KubernetesCluster ${CLUSTER_ID}
|
||||
echo "Creating autoscaling group"
|
||||
${AWS_ASG_CMD} create-auto-scaling-group \
|
||||
--auto-scaling-group-name ${ASG_NAME} \
|
||||
--launch-configuration-name ${ASG_NAME} \
|
||||
--min-size ${NUM_MINIONS} \
|
||||
--max-size ${NUM_MINIONS} \
|
||||
--vpc-zone-identifier ${SUBNET_ID} \
|
||||
--tags ResourceId=${ASG_NAME},ResourceType=auto-scaling-group,Key=Name,Value=${NODE_INSTANCE_PREFIX} \
|
||||
ResourceId=${ASG_NAME},ResourceType=auto-scaling-group,Key=Role,Value=${MINION_TAG} \
|
||||
ResourceId=${ASG_NAME},ResourceType=auto-scaling-group,Key=KubernetesCluster,Value=${CLUSTER_ID}
|
||||
|
||||
MINION_IDS[$i]=$minion_id
|
||||
done
|
||||
# Wait for the minions to be running
|
||||
# TODO(justinsb): This is really not needed any more
|
||||
attempt=0
|
||||
while true; do
|
||||
find-running-minions > $LOG
|
||||
if [[ ${#MINION_IDS[@]} == ${NUM_MINIONS} ]]; then
|
||||
echo -e " ${color_green}${#MINION_IDS[@]} minions started; ready${color_norm}"
|
||||
break
|
||||
fi
|
||||
|
||||
# Configure minion networking
|
||||
# TODO(justinsb): Check if we can change source-dest-check before instance fully running
|
||||
for (( i=0; i<${#MINION_NAMES[@]}; i++)); do
|
||||
# We are not able to add a route to the instance until that instance is in "running" state.
|
||||
# This is quite an ugly solution to this problem. In Bash 4 we could use assoc. arrays to do this for
|
||||
# all instances at once but we can't be sure we are running Bash 4.
|
||||
minion_id=${MINION_IDS[$i]}
|
||||
wait-for-instance-running $minion_id
|
||||
echo "Minion ${MINION_NAMES[$i]} running"
|
||||
if (( attempt > 30 )); then
|
||||
echo
|
||||
echo "Expected number of minions did not start in time"
|
||||
echo
|
||||
echo -e "${color_red}Expected number of minions failed to start. Your cluster is unlikely" >&2
|
||||
echo "to work correctly. Please run ./cluster/kube-down.sh and re-create the" >&2
|
||||
echo -e "cluster. (sorry!)${color_norm}" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo -e " ${color_yellow}${#MINION_IDS[@]} minions started; waiting${color_norm}"
|
||||
attempt=$(($attempt+1))
|
||||
sleep 10
|
||||
$AWS_CMD modify-instance-attribute --instance-id $minion_id --source-dest-check '{"Value": false}' > $LOG
|
||||
done
|
||||
|
||||
FAIL=0
|
||||
for job in `jobs -p`; do
|
||||
wait $job || let "FAIL+=1"
|
||||
done
|
||||
if (( $FAIL != 0 )); then
|
||||
echo "${FAIL} commands failed. Exiting."
|
||||
exit 2
|
||||
fi
|
||||
|
||||
detect-master > $LOG
|
||||
detect-minions > $LOG
|
||||
|
||||
# TODO(justinsb): This is really not necessary any more
|
||||
# Wait 3 minutes for cluster to come up. We hit it with a "highstate" after that to
|
||||
# make sure that everything is well configured.
|
||||
# TODO: Can we poll here?
|
||||
@ -937,15 +975,15 @@ function kube-up {
|
||||
set +e
|
||||
|
||||
# Basic sanity checking
|
||||
# TODO(justinsb): This is really not needed any more
|
||||
local rc # Capture return code without exiting because of errexit bash option
|
||||
for (( i=0; i<${#MINION_NAMES[@]}; i++)); do
|
||||
for (( i=0; i<${#KUBE_MINION_IP_ADDRESSES[@]}; i++)); do
|
||||
# Make sure docker is installed and working.
|
||||
local attempt=0
|
||||
while true; do
|
||||
local minion_name=${MINION_NAMES[$i]}
|
||||
local minion_ip=${KUBE_MINION_IP_ADDRESSES[$i]}
|
||||
echo -n Attempt "$(($attempt+1))" to check Docker on node "${minion_name} @ ${minion_ip}" ...
|
||||
local output=`check-minion ${minion_name} ${minion_ip}`
|
||||
echo -n "Attempt $(($attempt+1)) to check Docker on node @ ${minion_ip} ..."
|
||||
local output=`check-minion ${minion_ip}`
|
||||
echo $output
|
||||
if [[ "${output}" != "working" ]]; then
|
||||
if (( attempt > 9 )); then
|
||||
@ -996,6 +1034,15 @@ function kube-down {
|
||||
done
|
||||
fi
|
||||
|
||||
if [[ -n $(${AWS_ASG_CMD} --output text describe-auto-scaling-groups --auto-scaling-group-names ${ASG_NAME} --query AutoScalingGroups[].AutoScalingGroupName) ]]; then
|
||||
echo "Deleting auto-scaling group: ${ASG_NAME}"
|
||||
${AWS_ASG_CMD} delete-auto-scaling-group --force-delete --auto-scaling-group-name ${ASG_NAME}
|
||||
fi
|
||||
if [[ -n $(${AWS_ASG_CMD} --output text describe-launch-configurations --launch-configuration-names ${ASG_NAME} --query LaunchConfigurations[].LaunchConfigurationName) ]]; then
|
||||
echo "Deleting auto-scaling launch configuration: ${ASG_NAME}"
|
||||
${AWS_ASG_CMD} delete-launch-configuration --launch-configuration-name ${ASG_NAME}
|
||||
fi
|
||||
|
||||
echo "Deleting instances in VPC: ${vpc_id}"
|
||||
instance_ids=$($AWS_CMD --output text describe-instances \
|
||||
--filters Name=vpc-id,Values=${vpc_id} \
|
||||
@ -1170,6 +1217,14 @@ function ssh-to-node {
|
||||
local node="$1"
|
||||
local cmd="$2"
|
||||
|
||||
if [[ "${node}" == "${MASTER_NAME}" ]]; then
|
||||
node=$(get_instanceid_from_name ${MASTER_NAME})
|
||||
if [[ -z "${node-}" ]]; then
|
||||
echo "Could not detect Kubernetes master node. Make sure you've launched a cluster with 'kube-up.sh'"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
local ip=$(get_instance_public_ip ${node})
|
||||
if [[ -z "$ip" ]]; then
|
||||
echo "Could not detect IP for ${node}."
|
||||
|
@ -90,6 +90,8 @@ type EC2 interface {
|
||||
DescribeRouteTables(request *ec2.DescribeRouteTablesInput) ([]*ec2.RouteTable, error)
|
||||
CreateRoute(request *ec2.CreateRouteInput) (*ec2.CreateRouteOutput, error)
|
||||
DeleteRoute(request *ec2.DeleteRouteInput) (*ec2.DeleteRouteOutput, error)
|
||||
|
||||
ModifyInstanceAttribute(request *ec2.ModifyInstanceAttributeInput) (*ec2.ModifyInstanceAttributeOutput, error)
|
||||
}
|
||||
|
||||
// This is a simple pass-through of the ELB client interface, which allows for testing
|
||||
@ -414,6 +416,10 @@ func (s *awsSdkEC2) DeleteRoute(request *ec2.DeleteRouteInput) (*ec2.DeleteRoute
|
||||
return s.ec2.DeleteRoute(request)
|
||||
}
|
||||
|
||||
func (s *awsSdkEC2) ModifyInstanceAttribute(request *ec2.ModifyInstanceAttributeInput) (*ec2.ModifyInstanceAttributeOutput, error) {
|
||||
return s.ec2.ModifyInstanceAttribute(request)
|
||||
}
|
||||
|
||||
func init() {
|
||||
cloudprovider.RegisterCloudProvider(ProviderName, func(config io.Reader) (cloudprovider.Interface, error) {
|
||||
creds := credentials.NewChainCredentials(
|
||||
|
@ -69,9 +69,29 @@ func (s *AWSCloud) ListRoutes(clusterName string) ([]*cloudprovider.Route, error
|
||||
return routes, nil
|
||||
}
|
||||
|
||||
// Sets the instance attribute "source-dest-check" to the specified value
|
||||
func (s *AWSCloud) configureInstanceSourceDestCheck(instanceID string, sourceDestCheck bool) error {
|
||||
request := &ec2.ModifyInstanceAttributeInput{}
|
||||
request.InstanceID = aws.String(instanceID)
|
||||
request.SourceDestCheck = &ec2.AttributeBooleanValue{Value: aws.Boolean(sourceDestCheck)}
|
||||
|
||||
_, err := s.ec2.ModifyInstanceAttribute(request)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error configuring source-dest-check on instance %s: %v", instanceID, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// CreateRoute implements Routes.CreateRoute
|
||||
// Create the described route
|
||||
func (s *AWSCloud) CreateRoute(clusterName string, nameHint string, route *cloudprovider.Route) error {
|
||||
// In addition to configuring the route itself, we also need to configure the instance to accept that traffic
|
||||
// On AWS, this requires turning source-dest checks off
|
||||
err := s.configureInstanceSourceDestCheck(route.TargetInstance, false)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
table, err := s.findRouteTable(clusterName)
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -372,6 +372,10 @@ func (s *FakeEC2) DeleteRoute(request *ec2.DeleteRouteInput) (*ec2.DeleteRouteOu
|
||||
panic("Not implemented")
|
||||
}
|
||||
|
||||
func (s *FakeEC2) ModifyInstanceAttribute(request *ec2.ModifyInstanceAttributeInput) (*ec2.ModifyInstanceAttributeOutput, error) {
|
||||
panic("Not implemented")
|
||||
}
|
||||
|
||||
type FakeELB struct {
|
||||
aws *FakeAWSServices
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user