Add AWS build support

Signed-off-by: Nathan LeClaire <nathan.leclaire@gmail.com>
This commit is contained in:
Nathan LeClaire
2016-07-14 15:59:47 -07:00
parent 0a01b10b1a
commit abb968c538
18 changed files with 694 additions and 262 deletions

View File

@@ -0,0 +1,18 @@
FROM alpine
RUN apk add --update \
python \
py-pip \
bash \
curl \
e2fsprogs \
jq \
syslinux
RUN pip install -U awscli
RUN mkdir -p /scripts
WORKDIR /scripts
COPY ./build-common.sh .
COPY ./aws/common.sh .
COPY ./aws/bake-ami.sh .
ENTRYPOINT ["./bake-ami.sh"]

1
alpine/cloud/aws/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
*.out

View File

@@ -0,0 +1,70 @@
# Compile Moby for AWS (Amazon Machine Image)
#### Requirements
To compile, the requirements are:
1. Must be working on a EC2 instance
2. Must have `docker` and `docker-compose` installed
3. Must have configured Amazon credentials on instances (`aws configure`)
(The build will mount `~/.aws` into the build container).
#### Building
To bake the AMI:
```console
$ make ami
```
Inside of the `alpine/` subdirectory of the main Moby repo.
This will:
1. Clean up any remaining artifacts of old AMI builds
2. Creates a new EBS volume and attaches it to the build instance
3. Formats and partitions the volume for installation of Linux
4. Sets up artifacts (`initrd.img` and `vmlinuz64`) inside the new partition for booting
5. Installs MBR to boot syslinux to the device
6. Takes snapshot of EBS volume with Moby installed
7. Turns the snapshot into an AMI
#### Testing
Once the AMI has been created a file, `aws/ami_id.out` will be written which
contains its ID.
You can boot a small AWS instance from this AMI using the `aws/run-instance.sh`
script.
There is no SSH available today, but inbound access on the Docker API should
work if you configure a proper security group and attach it to the instance.
For instance, allow inbound access on `:2375` and a command such as this from
your compiler instance should work to get a "root-like" shell:
```console
$ docker -H 172.31.2.176:2375 \
run -ti \
--privileged \
--pid host \
debian \
nsenter -t 1 -m
```
Alternatively, you can also have the `aws/run-instance.sh` script create a
security group and Swarm for you automatically (including worker/agent
instances to join the cluster).
To do so, set the `JOIN_INSTANCES` environment variable to any value, and
specify how many "joiners" (worker nodes) you want to also spin up using the
`JOINERS_COUNT` environment variable (the default is 1). e.g.:
```
$ JOIN_INSTANCES=1 JOINERS_COUNT=3 ./aws/run-instance.sh
```
This will give you a 4 node cluster with a manager named
`docker-swarm-manager`, and workers named `docker-swarm-joiner-0`,
`docker-swarm-joiner-1`, and so on.

View File

@@ -0,0 +1,52 @@
#!/bin/bash
# Usage: ./aws/approve-account.sh [ACCOUNT_ID] [AMI_FILE]
#
# ACCOUNT_ID must be a valid AWS account ID
#
# AMI_FILE must be a newline-delimited file containing the AMI IDs to approve
# launch permissions for the given account and their region, e.g.:
#
# ami-xxxxxxx us-west-1
# ami-yyyyyyy us-east-1
set -e
source "cloud/build-common.sh"
source "cloud/aws/common.sh"
USER_ID="$1"
if [ ${#USER_ID} -lt 12 ]
then
# Pad zeros in front so it will always be 12 chars long, e.g. some AWS
# accounts have ID like '123123123123' and others like '000123123123'
USER_ID_PADDED=$(printf "%0$((12-${#USER_ID}))d%s" 0 ${USER_ID})
else
USER_ID_PADDED="${USER_ID}"
fi
AMI_FILE="$2"
if [ ! -f ${AMI_FILE} ]
then
errecho "AMI file not found."
exit 1
fi
while read REGION_AMI_ID
do
REGION=$(echo ${REGION_AMI_ID} | cut -d' ' -f 1)
IMAGE_ID=$(echo ${REGION_AMI_ID} | cut -d' ' -f 2)
arrowecho "Approving launch for ${IMAGE_ID} in ${REGION}"
aws ec2 modify-image-attribute \
--region ${REGION} \
--image-id ${IMAGE_ID} \
--launch-permission "{
\"Add\": [{
\"UserId\": \"${USER_ID_PADDED}\"
}]
}"
done <${AMI_FILE}
arrowecho "Done approving account ${USER_ID_PADDED}"

134
alpine/cloud/aws/bake-ami.sh Executable file
View File

@@ -0,0 +1,134 @@
#!/bin/bash
# Script to automate creation and snapshotting of a Moby AMI. Currently, it's
# intended to be invoked from an instance running in the same region as the
# target AMI will be in, since it directly mounts the created EBS volume as a
# device on this running instance.
set -e
declare -xr PROVIDER="aws"
source "./build-common.sh"
source "${MOBY_SRC_ROOT}/cloud/aws/common.sh"
# TODO(nathanleclaire): This could be calculated dynamically to avoid conflicts.
EBS_DEVICE=/dev/xvdb
bake_image()
{
# Create a new EBS volume. We will format this volume to boot into Moby
# initrd via syslinux in MBR. That formatted drive can then be snapshotted
# and turned into an AMI.
VOLUME_ID=$(aws ec2 create-volume \
--size 20 \
--availability-zone $(current_instance_az) | jq -r .VolumeId)
tag ${VOLUME_ID}
aws ec2 wait volume-available --volume-ids ${VOLUME_ID}
arrowecho "Attaching volume"
aws ec2 attach-volume \
--volume-id ${VOLUME_ID} \
--device ${EBS_DEVICE} \
--instance-id $(current_instance_id) >/dev/null
aws ec2 wait volume-in-use --volume-ids ${VOLUME_ID}
format_on_device "${EBS_DEVICE}"
configure_syslinux_on_device_partition "${EBS_DEVICE}" "${EBS_DEVICE}1"
arrowecho "Taking snapshot!"
# Take a snapshot of the volume we wrote to.
SNAPSHOT_ID=$(aws ec2 create-snapshot \
--volume-id ${VOLUME_ID} \
--description "Snapshot of Moby device for AMI baking" | jq -r .SnapshotId)
tag ${SNAPSHOT_ID}
arrowecho "Waiting for snapshot completion"
aws ec2 wait snapshot-completed --snapshot-ids ${SNAPSHOT_ID}
# Convert that snapshot into an AMI as the root device.
IMAGE_ID=$(aws ec2 register-image \
--name "${IMAGE_NAME}" \
--description "${IMAGE_DESCRIPTION}" \
--architecture x86_64 \
--root-device-name "${EBS_DEVICE}" \
--virtualization-type "hvm" \
--block-device-mappings "[
{
\"DeviceName\": \"${EBS_DEVICE}\",
\"Ebs\": {
\"SnapshotId\": \"${SNAPSHOT_ID}\"
}
}
]" | jq -r .ImageId)
tag ${IMAGE_ID}
# Boom, now you (should) have a Moby AMI.
arrowecho "Created AMI: ${IMAGE_ID}"
echo "${IMAGE_ID}" >"${MOBY_SRC_ROOT}/cloud/aws/ami_id.out"
}
clean_tagged_resources()
{
if [ -d "${MOBY_SRC_ROOT}/moby" ]
then
rm -rf "${MOBY_SRC_ROOT}/moby"
fi
VOLUME_ID=$(aws ec2 describe-volumes --filters "Name=tag-key,Values=$1" | jq -r .Volumes[0].VolumeId)
if [ ${VOLUME_ID} == "null" ]
then
arrowecho "No volume found, skipping"
else
arrowecho "Detaching volume"
aws ec2 detach-volume --volume-id ${VOLUME_ID} >/dev/null || errecho "WARN: Error detaching volume!"
aws ec2 wait volume-available --volume-ids ${VOLUME_ID}
arrowecho "Deleting volume"
aws ec2 delete-volume --volume-id ${VOLUME_ID} >/dev/null
fi
IMAGE_ID=$(aws ec2 describe-images --filters "Name=tag-key,Values=$1" | jq -r .Images[0].ImageId)
if [ ${IMAGE_ID} == "null" ]
then
arrowecho "No image found, skipping"
else
arrowecho "Deregistering previously baked AMI"
# Sometimes describe-images does not return null even if the found
# image cannot be deregistered
#
# TODO(nathanleclaire): More elegant solution?
aws ec2 deregister-image --image-id ${IMAGE_ID} >/dev/null || errecho "WARN: Issue deregistering previously tagged image!"
fi
SNAPSHOT_ID=$(aws ec2 describe-snapshots --filters "Name=tag-key,Values=$1" | jq -r .Snapshots[0].SnapshotId)
if [ ${SNAPSHOT_ID} == "null" ]
then
arrowecho "No snapshot found, skipping"
else
arrowecho "Deleting volume snapshot"
aws ec2 delete-snapshot --snapshot-id ${SNAPSHOT_ID}
fi
}
case "$1" in
bake)
bake_image
;;
clean)
arrowecho "Cleaning resources from previous build tag if applicable..."
clean_tagged_resources "${TAG_KEY_PREV}"
arrowecho "Cleaning resources from current build tag if applicable..."
clean_tagged_resources "${TAG_KEY}"
;;
*)
echo "Command $1 not found. Usage: ./bake-ami.sh [bake|clean]"
esac

View File

@@ -0,0 +1,35 @@
#!/bin/bash
set -e
# When changing this value, ensure to change TAG_KEY_PREV to its old value.
TAG_KEY="aws-1.12.0-rc3-beta-3-dev"
TAG_KEY_PREV="aws-1.12.0-rc3-beta-2-dev"
INSTANCE_ENDPOINT="http://169.254.169.254/latest"
INSTANCE_METADATA_API_ENDPOINT="${INSTANCE_ENDPOINT}/meta-data/"
IMAGE_NAME=${IMAGE_NAME:-"Moby Linux ${TAG_KEY}"}
IMAGE_DESCRIPTION=${IMAGE_DESCRIPTION:-"The best OS for running Docker, version ${TAG_KEY}"}
current_instance_region()
{
curl -s "${INSTANCE_ENDPOINT}/dynamic/instance-identity/document" | jq .region -r
}
current_instance_az()
{
curl -s "${INSTANCE_METADATA_API_ENDPOINT}/placement/availability-zone"
}
current_instance_id()
{
curl -s "${INSTANCE_METADATA_API_ENDPOINT}/instance-id"
}
# We tag resources created as part of the build to ensure that they can be
# cleaned up later.
tag()
{
arrowecho "Tagging $1"
aws ec2 create-tags --resources "$1" --tags "Key=${TAG_KEY},Value=" >/dev/null
}

89
alpine/cloud/aws/copy-amis.sh Executable file
View File

@@ -0,0 +1,89 @@
#!/bin/bash
# Usage: ./cloud/aws/copy-amis.sh
# Assumptions:
# - A finished Moby AMI ID has been deposited in ./cloud/aws/ami_id.out. (This is
# the behavior of the ./cloud/aws/bake-ami.sh script)
# - (recommended) IMAGE_NAME is set to a custom value (e.g., "Moby Linux")
# - (recommended) IMAGE_DESCRIPTION is set to a custom value (e.g., "1.12-0-tp1")
#
#
# Outputs:
# - A file of newline delimited AMI IDs representing the AMI for each region.
# - A file containing a subsection of a CloudFormation template outlining these AMIs (JSON).
set -e
source "cloud/build-common.sh"
source "cloud/aws/common.sh"
SOURCE_AMI_ID=$(cat ./cloud/aws/ami_id.out)
# To have a list of just the IDs (approve accounts later if desired)
AMIS_IDS_DEST="./cloud/aws/copied_image_regions_${SOURCE_AMI_ID}.out"
# File to drop the (mostly correct) CF template section in
CF_TEMPLATE="./cloud/aws/cf_image_regions_${SOURCE_AMI_ID}.out"
cfecho()
{
echo "$@" >>${CF_TEMPLATE}
}
cfprintf()
{
printf "$@" >>${CF_TEMPLATE}
}
if [ -f ${AMIS_IDS_DEST} ]
then
rm ${AMIS_IDS_DEST}
fi
if [ -f ${CF_TEMPLATE} ]
then
rm ${CF_TEMPLATE}
fi
cfecho '"AWSRegionArch2AMI": {'
REGIONS=(us-west-1 us-west-2 us-east-1 eu-west-1 eu-central-1 ap-southeast-1 ap-northeast-1 ap-southeast-2 ap-northeast-2 sa-east-1)
for REGION in ${REGIONS[@]}
do
REGION_AMI_ID=$(aws ec2 copy-image \
--source-region $(current_instance_region) \
--source-image-id "${SOURCE_AMI_ID}" \
--region "${REGION}" \
--name "${IMAGE_NAME}" \
--description "${IMAGE_DESCRIPTION}" | jq -r .ImageId)
echo "${REGION_AMI_ID}"
echo "${REGION} ${REGION_AMI_ID}" >>${AMIS_IDS_DEST}
cfprintf " \"${REGION}\": {
\"HVM64\": \"${REGION_AMI_ID}\",
\"HVMG2\": \"NOT_SUPPORTED\"
}"
# Emit valid JSON. No trailing comma on last element.
#
# TODO: I'm pretty sure this negative index is a Bash-ism, and a Bash-ism
# from recent versions at that.
if [ ${REGION} != ${REGIONS[-1]} ]
then
cfecho ","
else
cfecho
fi
done
cfecho "}"
echo "All done. The results for adding to CloudFormation can be"
echo "viewed here:"
arrowecho ${CF_TEMPLATE}
echo
echo "The plain list of AMIs can be viewed here:"
arrowecho ${AMIS_IDS_DEST}

View File

@@ -0,0 +1,9 @@
#!/bin/sh
logcmd()
{
"$@" 2>&1 | awk -v timestamp="$(date) " '$0=timestamp$0' >>/var/log/docker-swarm.log
}
logcmd docker swarm join {{MANAGER_IP}}:4500
logcmd docker swarm info

View File

@@ -0,0 +1,15 @@
#!/bin/sh
METADATA=http://169.254.169.254/latest/meta-data
# TODO: This dial retry loop should be handled by openrc maybe? (or by docker
# service)
docker swarm init \
--secret "" \
--auto-accept manager \
--auto-accept worker \
--listen-addr $(wget -qO- ${METADATA}/local-ipv4 | sed 's/http:\/\///'):4500 \
>>/var/log/docker-swarm.log 2>&1
exit 0
exit 1

138
alpine/cloud/aws/run-instance.sh Executable file
View File

@@ -0,0 +1,138 @@
#!/bin/bash
# Quick script to boot an instance from generated AMI. Intended to be invoked
# from "alpine" directory.
set -e
JOINERS_COUNT=${JOINERS_COUNT:-1}
METADATA="http://169.254.169.254/latest/meta-data"
MANAGER_SG="docker-swarm-ingress"
manager_sg_id()
{
aws ec2 describe-security-groups \
--filter Name=group-name,Values=${MANAGER_SG} | jq -r .SecurityGroups[0].GroupId
}
attach_security_group()
{
MANAGER_SG_ID=$(manager_sg_id)
if [ ${MANAGER_SG_ID} == "null" ]
then
CUR_INSTANCE_MAC=$(wget -qO- ${METADATA}/network/interfaces/macs)
CUR_INSTANCE_VPC_CIDR=$(wget -qO- ${METADATA}/network/interfaces/macs/${CUR_INSTANCE_MAC}vpc-ipv4-cidr-block)
MANAGER_SG_ID=$(aws ec2 create-security-group \
--group-name ${MANAGER_SG} \
--description "Allow inbound access to Docker API and for remote join node connection" | jq -r .GroupId)
echo "Created security group ${MANAGER_SG_ID}"
# Hack to wait for SG to be created before adding rules
sleep 5
# For Docker API
aws ec2 authorize-security-group-ingress \
--group-id ${MANAGER_SG_ID} \
--protocol tcp \
--port 2375 \
--cidr ${CUR_INSTANCE_VPC_CIDR}
# For Swarm join node connection
aws ec2 authorize-security-group-ingress \
--group-id ${MANAGER_SG_ID} \
--protocol tcp \
--port 4500 \
--cidr ${CUR_INSTANCE_VPC_CIDR}
fi
aws ec2 modify-instance-attribute \
--instance-id "$1" \
--groups ${MANAGER_SG_ID}
}
poll_instance_log()
{
echo "Waiting for instance boot log to become available"
INSTANCE_BOOT_LOG="null"
while [ ${INSTANCE_BOOT_LOG} == "null" ]
do
INSTANCE_BOOT_LOG=$(aws ec2 get-console-output --instance-id "$1" | jq -r .Output)
sleep 5
done
aws ec2 get-console-output --instance-id "$1" | jq -r .Output
}
OLD_INSTANCE_IDS=$(cat ./cloud/aws/instance_id.out | tr '\n' ' ')
aws ec2 terminate-instances --instance-id ${OLD_INSTANCE_IDS} || true
if [ ! -f ./cloud/aws/ami_id.out ]
then
echo "AMI ID to launch instance from not found"
exit 1
fi
AMI_ID=$(cat ./cloud/aws/ami_id.out)
echo "Using image ${AMI_ID}"
MANAGER_INSTANCE_ID=$(aws ec2 run-instances \
--image-id ${AMI_ID} \
--instance-type t2.micro \
--user-data file://./cloud/aws/manager-user-data.sh | jq -r .Instances[0].InstanceId)
aws ec2 create-tags --resources ${MANAGER_INSTANCE_ID} --tags Key=Name,Value=$(whoami)-docker-swarm-manager
echo "Running manager instance ${MANAGER_INSTANCE_ID}"
# Deliberately truncate file here.
echo ${MANAGER_INSTANCE_ID} >./cloud/aws/instance_id.out
attach_security_group ${MANAGER_INSTANCE_ID}
# User can set this variable to indicate they want a whole swarm.
if [ ! -z "$JOIN_INSTANCES" ]
then
MANAGER_IP=$(aws ec2 describe-instances \
--instance-id ${MANAGER_INSTANCE_ID} | jq -r .Reservations[0].Instances[0].NetworkInterfaces[0].PrivateIpAddresses[0].PrivateIpAddress)
TMP_JOINER_USERDATA=/tmp/joiner-user-data-${MANAGER_INSTANCE_ID}.sh
cat ./cloud/aws/joiner-user-data.sh | sed "s/{{MANAGER_IP}}/${MANAGER_IP}/" >${TMP_JOINER_USERDATA}
JOINER_INSTANCE_IDS=$(aws ec2 run-instances \
--image-id ${AMI_ID} \
--instance-type t2.micro \
--count ${JOINERS_COUNT} \
--user-data file://${TMP_JOINER_USERDATA} | jq -r .Instances[].InstanceId)
echo "Joining nodes:" ${JOINER_INSTANCE_IDS}
NODE_NUMBER=0
for ID in ${JOINER_INSTANCE_IDS}
do
echo "Tagging joiner instance #${NODE_NUMBER}: ${ID}"
# For debugging purposes only. In "production" this SG should not be
# attached to these instances.
attach_security_group ${ID}
# Do not truncate file here.
echo ${ID} >>./cloud/aws/instance_id.out
# TODO: Get list of ids and do this for each if applicable.
aws ec2 create-tags --resources ${ID} --tags Key=Name,Value=$(whoami)-docker-swarm-joiner-${NODE_NUMBER}
NODE_NUMBER=$((NODE_NUMBER+1))
done
exit
fi
echo "Waiting for manager to be running..."
aws ec2 wait instance-running --instance-ids $(cat ./cloud/aws/instance_id.out | tr '\n' ' ')
poll_instance_log ${MANAGER_INSTANCE_ID}

View File

@@ -0,0 +1,7 @@
DEFAULT linux
TIMEOUT 0
PROMPT 0
LABEL linux
KERNEL /vmlinuz64
INITRD /initrd.img
APPEND root=/dev/xvdb1 console=hvc0 console=tty0 console=tty1 console=ttyS0 mobyplatform=aws

123
alpine/cloud/build-common.sh Executable file
View File

@@ -0,0 +1,123 @@
#!/bin/bash
set -e
MOBY_SRC_ROOT=${MOBY_SRC_ROOT:-/mnt}
arrowecho()
{
echo " --->" "$@"
}
errecho()
{
echo "$@" >&2
}
# $1 - the device to format (e.g. /dev/xvdb)
format_on_device()
{
while [ ! -e "$1" ]
do
sleep 0.1
done
arrowecho "Formatting boot partition"
# TODO (nathanleclaire): Any more readable or more elegant solution to
# account for this minor (specify 1st partition as bootable) difference
# between cloud builds?
if [ "${PROVIDER}" == "aws" ]
then
# This heredoc might be confusing at first glance, so here is a detailed
# summary of what each line does:
#
# n - create new partition
# p - make it a primary partition
# 1 - it should be partition #1
# \n - use default first cylinder
# \n - use default last cylinder
# a - toggle a partition as bootable
# 1 - first partition
# w - write changes and exit
fdisk "$1" << EOF
n
p
1
a
1
w
EOF
elif [ ${PROVIDER} == "azure" ]
then
fdisk "$1" << EOF
n
p
1
a
w
EOF
else
errecho "Provider not recognized: ${PROVIDER}"
exit 1
fi
# To ensure everything went smoothly, print the resulting partition table.
arrowecho "Printing device partition contents"
fdisk -l "$1"
}
# $1 - device
# $2 - partition 1 on device
configure_syslinux_on_device_partition()
{
# Mount created root partition, format it as ext4, and copy over the needed
# files for boot (syslinux configuration, kernel binary, and initrd.img)
while [ ! -e "$2" ]
do
sleep 0.1
done
arrowecho "Making filesystem on partition"
mke2fs -t ext4 "$2"
arrowecho "Mounting partition filesystem"
ROOT_PARTITION_MOUNT="${MOBY_SRC_ROOT}/moby"
if [ ! -d ${ROOT_PARTITION_MOUNT} ]
then
mkdir -p ${ROOT_PARTITION_MOUNT}
fi
mount -t ext4 "$2" ${ROOT_PARTITION_MOUNT}
arrowecho "Copying image and kernel binary to partition"
# Get files needed to boot in place.
cp ${MOBY_SRC_ROOT}/cloud/${PROVIDER}/syslinux.cfg ${ROOT_PARTITION_MOUNT}
cat ${ROOT_PARTITION_MOUNT}/syslinux.cfg
cp ${MOBY_SRC_ROOT}/kernel/vmlinuz64 ${ROOT_PARTITION_MOUNT}
cp ${MOBY_SRC_ROOT}/initrd.img ${ROOT_PARTITION_MOUNT}
# From http://www.syslinux.org/wiki/index.php?title=EXTLINUX:
#
# "Note that EXTLINUX installs in the filesystem partition like a
# well-behaved bootloader :). Thus, it needs a master boot record in the
# partition table; the mbr.bin shipped with SYSLINUX should work well."
# Thus, this step installs syslinux on the mounted filesystem (partition
# 1).
arrowecho "Installing syslinux to partition"
extlinux --install ${ROOT_PARTITION_MOUNT}
# Format master boot record in partition table on target device.
arrowecho "Copying MBR to partition table in target device"
dd if=/usr/share/syslinux/mbr.bin of="$1" bs=440 count=1
umount ${ROOT_PARTITION_MOUNT}
arrowecho "Checking device/partition sanity"
fdisk -l "$1"
}