diff --git a/cluster/aws/config-default.sh b/cluster/aws/config-default.sh index 5e1d08e3701..e57947fadbb 100644 --- a/cluster/aws/config-default.sh +++ b/cluster/aws/config-default.sh @@ -37,6 +37,9 @@ IAM_PROFILE_MINION="kubernetes-minion" LOG="/dev/null" +MASTER_DISK_TYPE="${MASTER_DISK_TYPE:-gp2}" +MASTER_DISK_SIZE=${MASTER_DISK_SIZE:-20} + MASTER_NAME="${INSTANCE_PREFIX}-master" MASTER_TAG="${INSTANCE_PREFIX}-master" MINION_TAG="${INSTANCE_PREFIX}-minion" diff --git a/cluster/aws/config-test.sh b/cluster/aws/config-test.sh index 2ca00e721a3..b4179f38009 100755 --- a/cluster/aws/config-test.sh +++ b/cluster/aws/config-test.sh @@ -33,6 +33,9 @@ IAM_PROFILE_MINION="kubernetes-minion" LOG="/dev/null" +MASTER_DISK_TYPE="${MASTER_DISK_TYPE:-gp2}" +MASTER_DISK_SIZE=${MASTER_DISK_SIZE:-20} + MASTER_NAME="${INSTANCE_PREFIX}-master" MASTER_TAG="${INSTANCE_PREFIX}-master" MINION_TAG="${INSTANCE_PREFIX}-minion" diff --git a/cluster/aws/templates/format-disks.sh b/cluster/aws/templates/format-disks.sh index b1a99dc9094..5efb17c4f32 100644 --- a/cluster/aws/templates/format-disks.sh +++ b/cluster/aws/templates/format-disks.sh @@ -66,6 +66,7 @@ else # Remove any existing mounts for block_device in ${block_devices}; do + echo "Unmounting ${block_device}" /bin/umount ${block_device} sed -i -e "\|^${block_device}|d" /etc/fstab done @@ -80,26 +81,28 @@ else echo "Found multiple ephemeral block devices, formatting with btrfs as RAID-0" mkfs.btrfs -f --data raid0 ${block_devices[@]} fi - echo "${block_devices[0]} /mnt btrfs noatime 0 0" >> /etc/fstab - mount /mnt + echo "${block_devices[0]} /mnt/ephemeral btrfs noatime 0 0" >> /etc/fstab + mkdir -p /mnt/ephemeral + mount /mnt/ephemeral - mkdir -p /mnt/kubernetes + mkdir -p /mnt/ephemeral/kubernetes - move_docker="/mnt" - move_kubelet="/mnt/kubernetes" + move_docker="/mnt/ephemeral" + move_kubelet="/mnt/ephemeral/kubernetes" elif [[ ${docker_storage} == "aufs-nolvm" ]]; then if [[ ${#block_devices[@]} != 1 ]]; then echo "aufs-nolvm selected, but multiple ephemeral devices were found; only the first will be available" fi mkfs -t ext4 ${block_devices[0]} - echo "${block_devices[0]} /mnt ext4 noatime 0 0" >> /etc/fstab - mount /mnt + echo "${block_devices[0]} /mnt/ephemeral ext4 noatime 0 0" >> /etc/fstab + mkdir -p /mnt/ephemeral + mount /mnt/ephemeral - mkdir -p /mnt/kubernetes + mkdir -p /mnt/ephemeral/kubernetes - move_docker="/mnt" - move_kubelet="/mnt/kubernetes" + move_docker="/mnt/ephemeral" + move_kubelet="/mnt/ephemeral/kubernetes" elif [[ ${docker_storage} == "devicemapper" || ${docker_storage} == "aufs" ]]; then # We always use LVM, even with one device # In devicemapper mode, Docker can use LVM directly @@ -144,21 +147,21 @@ else fi mkfs -t ext4 /dev/vg-ephemeral/docker - mkdir -p /mnt/docker - echo "/dev/vg-ephemeral/docker /mnt/docker ext4 noatime 0 0" >> /etc/fstab - mount /mnt/docker - move_docker="/mnt" + mkdir -p /mnt/ephemeral/docker + echo "/dev/vg-ephemeral/docker /mnt/ephemeral/docker ext4 noatime 0 0" >> /etc/fstab + mount /mnt/ephemeral/docker + move_docker="/mnt/ephemeral" fi # Remaining 5% is for kubernetes data # TODO: Should this be a thin pool? e.g. would we ever want to snapshot this data? lvcreate -l 100%FREE -n kubernetes vg-ephemeral mkfs -t ext4 /dev/vg-ephemeral/kubernetes - mkdir -p /mnt/kubernetes - echo "/dev/vg-ephemeral/kubernetes /mnt/kubernetes ext4 noatime 0 0" >> /etc/fstab - mount /mnt/kubernetes + mkdir -p /mnt/ephemeral/kubernetes + echo "/dev/vg-ephemeral/kubernetes /mnt/ephemeral/kubernetes ext4 noatime 0 0" >> /etc/fstab + mount /mnt/ephemeral/kubernetes - move_kubelet="/mnt/kubernetes" + move_kubelet="/mnt/ephemeral/kubernetes" else echo "Ignoring unknown DOCKER_STORAGE: ${docker_storage}" fi diff --git a/cluster/aws/templates/setup-master-pd.sh b/cluster/aws/templates/setup-master-pd.sh new file mode 100644 index 00000000000..0f509b51864 --- /dev/null +++ b/cluster/aws/templates/setup-master-pd.sh @@ -0,0 +1,62 @@ +#!/bin/bash + +# Copyright 2015 The Kubernetes Authors All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Format and mount the disk, create directories on it for all of the master's +# persistent data, and link them to where they're used. + +echo "Waiting for master pd to be attached" +attempt=0 +while true; do + echo Attempt "$(($attempt+1))" to check for /dev/xvdb + if [[ -e /dev/xvdb ]]; then + echo "Found /dev/xvdb" + break + fi + attempt=$(($attempt+1)) + sleep 1 +done + +# Mount Master Persistent Disk +echo "Mounting master-pd" +mkdir -p /mnt/master-pd +mkfs -t ext4 /dev/xvdb +echo "/dev/xvdb /mnt/master-pd ext4 noatime 0 0" >> /etc/fstab +mount /mnt/master-pd + +# Contains all the data stored in etcd +mkdir -m 700 -p /mnt/master-pd/var/etcd +# Contains the dynamically generated apiserver auth certs and keys +mkdir -p /mnt/master-pd/srv/kubernetes +# Contains the cluster's initial config parameters and auth tokens +mkdir -p /mnt/master-pd/srv/salt-overlay +# Directory for kube-apiserver to store SSH key (if necessary) +mkdir -p /mnt/master-pd/srv/sshproxy + +ln -s -f /mnt/master-pd/var/etcd /var/etcd +ln -s -f /mnt/master-pd/srv/kubernetes /srv/kubernetes +ln -s -f /mnt/master-pd/srv/sshproxy /srv/sshproxy +ln -s -f /mnt/master-pd/srv/salt-overlay /srv/salt-overlay + +# This is a bit of a hack to get around the fact that salt has to run after the +# PD and mounted directory are already set up. We can't give ownership of the +# directory to etcd until the etcd user and group exist, but they don't exist +# until salt runs if we don't create them here. We could alternatively make the +# permissions on the directory more permissive, but this seems less bad. +if ! id etcd &>/dev/null; then + useradd -s /sbin/nologin -d /var/etcd etcd +fi +chown -R etcd /mnt/master-pd/var/etcd +chgrp -R etcd /mnt/master-pd/var/etcd diff --git a/cluster/aws/util.sh b/cluster/aws/util.sh index 614c13faef8..aa705c403a4 100644 --- a/cluster/aws/util.sh +++ b/cluster/aws/util.sh @@ -27,6 +27,9 @@ ALLOCATE_NODE_CIDRS=true NODE_INSTANCE_PREFIX="${INSTANCE_PREFIX}-minion" ASG_NAME="${NODE_INSTANCE_PREFIX}-group" +# We could allow the master disk volume id to be specified in future +MASTER_DISK_ID= + case "${KUBE_OS_DISTRIBUTION}" in ubuntu|wheezy|coreos) source "${KUBE_ROOT}/cluster/aws/${KUBE_OS_DISTRIBUTION}/util.sh" @@ -55,7 +58,7 @@ MINION_SG_NAME="kubernetes-minion-${CLUSTER_ID}" # Be sure to map all the ephemeral drives. We can specify more than we actually have. # TODO: Actually mount the correct number (especially if we have more), though this is non-trivial, and # only affects the big storage instance types, which aren't a typical use case right now. -BLOCK_DEVICE_MAPPINGS="[{\"DeviceName\": \"/dev/sdb\",\"VirtualName\":\"ephemeral0\"},{\"DeviceName\": \"/dev/sdc\",\"VirtualName\":\"ephemeral1\"},{\"DeviceName\": \"/dev/sdd\",\"VirtualName\":\"ephemeral2\"},{\"DeviceName\": \"/dev/sde\",\"VirtualName\":\"ephemeral3\"}]" +BLOCK_DEVICE_MAPPINGS="[{\"DeviceName\": \"/dev/sdc\",\"VirtualName\":\"ephemeral0\"},{\"DeviceName\": \"/dev/sdd\",\"VirtualName\":\"ephemeral1\"},{\"DeviceName\": \"/dev/sde\",\"VirtualName\":\"ephemeral2\"},{\"DeviceName\": \"/dev/sdf\",\"VirtualName\":\"ephemeral3\"}]" function json_val { python -c 'import json,sys;obj=json.load(sys.stdin);print obj'$1'' @@ -361,6 +364,34 @@ function authorize-security-group-ingress { fi } +# Gets master persistent volume, if exists +# Sets MASTER_DISK_ID +function find-master-pd { + local name=${MASTER_NAME}-pd + if [[ -z "${MASTER_DISK_ID}" ]]; then + MASTER_DISK_ID=`$AWS_CMD --output text describe-volumes \ + --filters Name=availability-zone,Values=${ZONE} \ + Name=tag:Name,Values=${name} \ + Name=tag:KubernetesCluster,Values=${CLUSTER_ID} \ + --query Volumes[].VolumeId` + fi +} + +# Gets or creates master persistent volume +# Sets MASTER_DISK_ID +function ensure-master-pd { + local name=${MASTER_NAME}-pd + + find-master-pd + + if [[ -z "${MASTER_DISK_ID}" ]]; then + echo "Creating master disk: size ${MASTER_DISK_SIZE}GB, type ${MASTER_DISK_TYPE}" + MASTER_DISK_ID=`$AWS_CMD create-volume --availability-zone ${ZONE} --volume-type ${MASTER_DISK_TYPE} --size ${MASTER_DISK_SIZE} --query VolumeId --output text` + add-tag ${MASTER_DISK_ID} Name ${name} + add-tag ${MASTER_DISK_ID} KubernetesCluster ${CLUSTER_ID} + fi +} + # Verify prereqs function verify-prereqs { if [[ "$(which aws)" == "" ]]; then @@ -724,6 +755,9 @@ function kube-up { # HTTPS to the master is allowed (for API access) authorize-security-group-ingress "${MASTER_SG_ID}" "--protocol tcp --port 443 --cidr 0.0.0.0/0" + # Get or create master persistent volume + ensure-master-pd + ( # We pipe this to the ami as a startup script in the user-data field. Requires a compatible ami echo "#! /bin/bash" @@ -756,6 +790,7 @@ function kube-up { echo "readonly DOCKER_STORAGE='${DOCKER_STORAGE:-}'" grep -v "^#" "${KUBE_ROOT}/cluster/aws/templates/common.sh" grep -v "^#" "${KUBE_ROOT}/cluster/aws/templates/format-disks.sh" + grep -v "^#" "${KUBE_ROOT}/cluster/aws/templates/setup-master-pd.sh" grep -v "^#" "${KUBE_ROOT}/cluster/aws/templates/create-dynamic-salt-files.sh" grep -v "^#" "${KUBE_ROOT}/cluster/aws/templates/download-release.sh" grep -v "^#" "${KUBE_ROOT}/cluster/aws/templates/salt-master.sh" @@ -778,10 +813,9 @@ function kube-up { add-tag $master_id KubernetesCluster ${CLUSTER_ID} echo "Waiting for master to be ready" - local attempt=0 - while true; do + while true; do echo -n Attempt "$(($attempt+1))" to check for master node local ip=$(get_instance_public_ip ${master_id}) if [[ -z "${ip}" ]]; then @@ -797,8 +831,14 @@ function kube-up { KUBE_MASTER_IP=$(assign-elastic-ip $ip $master_id) echo -e " ${color_green}[master running @${KUBE_MASTER_IP}]${color_norm}" - # We are not able to add a route to the instance until that instance is in "running" state. + # We are not able to add a route or volume to the instance until that instance is in "running" state. wait-for-instance-running $master_id + + # This is a race between instance start and volume attachment. There appears to be no way to start an AWS instance with a volume attached. + # To work around this, we wait for volume to be ready in setup-master-pd.sh + echo "Attaching peristent data volume (${MASTER_DISK_ID}) to master" + $AWS_CMD attach-volume --volume-id ${MASTER_DISK_ID} --device /dev/sdb --instance-id ${master_id} + sleep 10 $AWS_CMD create-route --route-table-id $ROUTE_TABLE_ID --destination-cidr-block ${MASTER_IP_RANGE} --instance-id $master_id > $LOG