Merge pull request #10403 from justinsb/ebs-for-master-data-2

AWS: Use persistent disk on master
This commit is contained in:
Robert Bailey 2015-06-26 21:52:10 -07:00
commit e6f14a21ce
5 changed files with 133 additions and 22 deletions

View File

@ -37,6 +37,9 @@ IAM_PROFILE_MINION="kubernetes-minion"
LOG="/dev/null"
MASTER_DISK_TYPE="${MASTER_DISK_TYPE:-gp2}"
MASTER_DISK_SIZE=${MASTER_DISK_SIZE:-20}
MASTER_NAME="${INSTANCE_PREFIX}-master"
MASTER_TAG="${INSTANCE_PREFIX}-master"
MINION_TAG="${INSTANCE_PREFIX}-minion"

View File

@ -33,6 +33,9 @@ IAM_PROFILE_MINION="kubernetes-minion"
LOG="/dev/null"
MASTER_DISK_TYPE="${MASTER_DISK_TYPE:-gp2}"
MASTER_DISK_SIZE=${MASTER_DISK_SIZE:-20}
MASTER_NAME="${INSTANCE_PREFIX}-master"
MASTER_TAG="${INSTANCE_PREFIX}-master"
MINION_TAG="${INSTANCE_PREFIX}-minion"

View File

@ -66,6 +66,7 @@ else
# Remove any existing mounts
for block_device in ${block_devices}; do
echo "Unmounting ${block_device}"
/bin/umount ${block_device}
sed -i -e "\|^${block_device}|d" /etc/fstab
done
@ -80,26 +81,28 @@ else
echo "Found multiple ephemeral block devices, formatting with btrfs as RAID-0"
mkfs.btrfs -f --data raid0 ${block_devices[@]}
fi
echo "${block_devices[0]} /mnt btrfs noatime 0 0" >> /etc/fstab
mount /mnt
echo "${block_devices[0]} /mnt/ephemeral btrfs noatime 0 0" >> /etc/fstab
mkdir -p /mnt/ephemeral
mount /mnt/ephemeral
mkdir -p /mnt/kubernetes
mkdir -p /mnt/ephemeral/kubernetes
move_docker="/mnt"
move_kubelet="/mnt/kubernetes"
move_docker="/mnt/ephemeral"
move_kubelet="/mnt/ephemeral/kubernetes"
elif [[ ${docker_storage} == "aufs-nolvm" ]]; then
if [[ ${#block_devices[@]} != 1 ]]; then
echo "aufs-nolvm selected, but multiple ephemeral devices were found; only the first will be available"
fi
mkfs -t ext4 ${block_devices[0]}
echo "${block_devices[0]} /mnt ext4 noatime 0 0" >> /etc/fstab
mount /mnt
echo "${block_devices[0]} /mnt/ephemeral ext4 noatime 0 0" >> /etc/fstab
mkdir -p /mnt/ephemeral
mount /mnt/ephemeral
mkdir -p /mnt/kubernetes
mkdir -p /mnt/ephemeral/kubernetes
move_docker="/mnt"
move_kubelet="/mnt/kubernetes"
move_docker="/mnt/ephemeral"
move_kubelet="/mnt/ephemeral/kubernetes"
elif [[ ${docker_storage} == "devicemapper" || ${docker_storage} == "aufs" ]]; then
# We always use LVM, even with one device
# In devicemapper mode, Docker can use LVM directly
@ -144,21 +147,21 @@ else
fi
mkfs -t ext4 /dev/vg-ephemeral/docker
mkdir -p /mnt/docker
echo "/dev/vg-ephemeral/docker /mnt/docker ext4 noatime 0 0" >> /etc/fstab
mount /mnt/docker
move_docker="/mnt"
mkdir -p /mnt/ephemeral/docker
echo "/dev/vg-ephemeral/docker /mnt/ephemeral/docker ext4 noatime 0 0" >> /etc/fstab
mount /mnt/ephemeral/docker
move_docker="/mnt/ephemeral"
fi
# Remaining 5% is for kubernetes data
# TODO: Should this be a thin pool? e.g. would we ever want to snapshot this data?
lvcreate -l 100%FREE -n kubernetes vg-ephemeral
mkfs -t ext4 /dev/vg-ephemeral/kubernetes
mkdir -p /mnt/kubernetes
echo "/dev/vg-ephemeral/kubernetes /mnt/kubernetes ext4 noatime 0 0" >> /etc/fstab
mount /mnt/kubernetes
mkdir -p /mnt/ephemeral/kubernetes
echo "/dev/vg-ephemeral/kubernetes /mnt/ephemeral/kubernetes ext4 noatime 0 0" >> /etc/fstab
mount /mnt/ephemeral/kubernetes
move_kubelet="/mnt/kubernetes"
move_kubelet="/mnt/ephemeral/kubernetes"
else
echo "Ignoring unknown DOCKER_STORAGE: ${docker_storage}"
fi

View File

@ -0,0 +1,62 @@
#!/bin/bash
# Copyright 2015 The Kubernetes Authors All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Format and mount the disk, create directories on it for all of the master's
# persistent data, and link them to where they're used.
echo "Waiting for master pd to be attached"
attempt=0
while true; do
echo Attempt "$(($attempt+1))" to check for /dev/xvdb
if [[ -e /dev/xvdb ]]; then
echo "Found /dev/xvdb"
break
fi
attempt=$(($attempt+1))
sleep 1
done
# Mount Master Persistent Disk
echo "Mounting master-pd"
mkdir -p /mnt/master-pd
mkfs -t ext4 /dev/xvdb
echo "/dev/xvdb /mnt/master-pd ext4 noatime 0 0" >> /etc/fstab
mount /mnt/master-pd
# Contains all the data stored in etcd
mkdir -m 700 -p /mnt/master-pd/var/etcd
# Contains the dynamically generated apiserver auth certs and keys
mkdir -p /mnt/master-pd/srv/kubernetes
# Contains the cluster's initial config parameters and auth tokens
mkdir -p /mnt/master-pd/srv/salt-overlay
# Directory for kube-apiserver to store SSH key (if necessary)
mkdir -p /mnt/master-pd/srv/sshproxy
ln -s -f /mnt/master-pd/var/etcd /var/etcd
ln -s -f /mnt/master-pd/srv/kubernetes /srv/kubernetes
ln -s -f /mnt/master-pd/srv/sshproxy /srv/sshproxy
ln -s -f /mnt/master-pd/srv/salt-overlay /srv/salt-overlay
# This is a bit of a hack to get around the fact that salt has to run after the
# PD and mounted directory are already set up. We can't give ownership of the
# directory to etcd until the etcd user and group exist, but they don't exist
# until salt runs if we don't create them here. We could alternatively make the
# permissions on the directory more permissive, but this seems less bad.
if ! id etcd &>/dev/null; then
useradd -s /sbin/nologin -d /var/etcd etcd
fi
chown -R etcd /mnt/master-pd/var/etcd
chgrp -R etcd /mnt/master-pd/var/etcd

View File

@ -27,6 +27,9 @@ ALLOCATE_NODE_CIDRS=true
NODE_INSTANCE_PREFIX="${INSTANCE_PREFIX}-minion"
ASG_NAME="${NODE_INSTANCE_PREFIX}-group"
# We could allow the master disk volume id to be specified in future
MASTER_DISK_ID=
case "${KUBE_OS_DISTRIBUTION}" in
ubuntu|wheezy|coreos)
source "${KUBE_ROOT}/cluster/aws/${KUBE_OS_DISTRIBUTION}/util.sh"
@ -55,7 +58,7 @@ MINION_SG_NAME="kubernetes-minion-${CLUSTER_ID}"
# Be sure to map all the ephemeral drives. We can specify more than we actually have.
# TODO: Actually mount the correct number (especially if we have more), though this is non-trivial, and
# only affects the big storage instance types, which aren't a typical use case right now.
BLOCK_DEVICE_MAPPINGS="[{\"DeviceName\": \"/dev/sdb\",\"VirtualName\":\"ephemeral0\"},{\"DeviceName\": \"/dev/sdc\",\"VirtualName\":\"ephemeral1\"},{\"DeviceName\": \"/dev/sdd\",\"VirtualName\":\"ephemeral2\"},{\"DeviceName\": \"/dev/sde\",\"VirtualName\":\"ephemeral3\"}]"
BLOCK_DEVICE_MAPPINGS="[{\"DeviceName\": \"/dev/sdc\",\"VirtualName\":\"ephemeral0\"},{\"DeviceName\": \"/dev/sdd\",\"VirtualName\":\"ephemeral1\"},{\"DeviceName\": \"/dev/sde\",\"VirtualName\":\"ephemeral2\"},{\"DeviceName\": \"/dev/sdf\",\"VirtualName\":\"ephemeral3\"}]"
function json_val {
python -c 'import json,sys;obj=json.load(sys.stdin);print obj'$1''
@ -361,6 +364,34 @@ function authorize-security-group-ingress {
fi
}
# Gets master persistent volume, if exists
# Sets MASTER_DISK_ID
function find-master-pd {
local name=${MASTER_NAME}-pd
if [[ -z "${MASTER_DISK_ID}" ]]; then
MASTER_DISK_ID=`$AWS_CMD --output text describe-volumes \
--filters Name=availability-zone,Values=${ZONE} \
Name=tag:Name,Values=${name} \
Name=tag:KubernetesCluster,Values=${CLUSTER_ID} \
--query Volumes[].VolumeId`
fi
}
# Gets or creates master persistent volume
# Sets MASTER_DISK_ID
function ensure-master-pd {
local name=${MASTER_NAME}-pd
find-master-pd
if [[ -z "${MASTER_DISK_ID}" ]]; then
echo "Creating master disk: size ${MASTER_DISK_SIZE}GB, type ${MASTER_DISK_TYPE}"
MASTER_DISK_ID=`$AWS_CMD create-volume --availability-zone ${ZONE} --volume-type ${MASTER_DISK_TYPE} --size ${MASTER_DISK_SIZE} --query VolumeId --output text`
add-tag ${MASTER_DISK_ID} Name ${name}
add-tag ${MASTER_DISK_ID} KubernetesCluster ${CLUSTER_ID}
fi
}
# Verify prereqs
function verify-prereqs {
if [[ "$(which aws)" == "" ]]; then
@ -724,6 +755,9 @@ function kube-up {
# HTTPS to the master is allowed (for API access)
authorize-security-group-ingress "${MASTER_SG_ID}" "--protocol tcp --port 443 --cidr 0.0.0.0/0"
# Get or create master persistent volume
ensure-master-pd
(
# We pipe this to the ami as a startup script in the user-data field. Requires a compatible ami
echo "#! /bin/bash"
@ -756,6 +790,7 @@ function kube-up {
echo "readonly DOCKER_STORAGE='${DOCKER_STORAGE:-}'"
grep -v "^#" "${KUBE_ROOT}/cluster/aws/templates/common.sh"
grep -v "^#" "${KUBE_ROOT}/cluster/aws/templates/format-disks.sh"
grep -v "^#" "${KUBE_ROOT}/cluster/aws/templates/setup-master-pd.sh"
grep -v "^#" "${KUBE_ROOT}/cluster/aws/templates/create-dynamic-salt-files.sh"
grep -v "^#" "${KUBE_ROOT}/cluster/aws/templates/download-release.sh"
grep -v "^#" "${KUBE_ROOT}/cluster/aws/templates/salt-master.sh"
@ -778,10 +813,9 @@ function kube-up {
add-tag $master_id KubernetesCluster ${CLUSTER_ID}
echo "Waiting for master to be ready"
local attempt=0
while true; do
while true; do
echo -n Attempt "$(($attempt+1))" to check for master node
local ip=$(get_instance_public_ip ${master_id})
if [[ -z "${ip}" ]]; then
@ -797,8 +831,14 @@ function kube-up {
KUBE_MASTER_IP=$(assign-elastic-ip $ip $master_id)
echo -e " ${color_green}[master running @${KUBE_MASTER_IP}]${color_norm}"
# We are not able to add a route to the instance until that instance is in "running" state.
# We are not able to add a route or volume to the instance until that instance is in "running" state.
wait-for-instance-running $master_id
# This is a race between instance start and volume attachment. There appears to be no way to start an AWS instance with a volume attached.
# To work around this, we wait for volume to be ready in setup-master-pd.sh
echo "Attaching peristent data volume (${MASTER_DISK_ID}) to master"
$AWS_CMD attach-volume --volume-id ${MASTER_DISK_ID} --device /dev/sdb --instance-id ${master_id}
sleep 10
$AWS_CMD create-route --route-table-id $ROUTE_TABLE_ID --destination-cidr-block ${MASTER_IP_RANGE} --instance-id $master_id > $LOG