diff --git a/cluster/gce/gci/master-helper.sh b/cluster/gce/gci/master-helper.sh index 5742e6cc488..26319c495f3 100755 --- a/cluster/gce/gci/master-helper.sh +++ b/cluster/gce/gci/master-helper.sh @@ -33,10 +33,12 @@ source "${KUBE_ROOT}/cluster/gce/gci/helper.sh" function create-master-instance { local address="" [[ -n ${1:-} ]] && address="${1}" + local internal_address="" + [[ -n ${2:-} ]] && internal_address="${2}" write-master-env ensure-gci-metadata-files - create-master-instance-internal "${MASTER_NAME}" "${address}" + create-master-instance-internal "${MASTER_NAME}" "${address}" "${internal_address}" } function replicate-master-instance() { @@ -89,6 +91,31 @@ function replicate-master-instance() { } +# run-gcloud-command runs a given command over ssh with retries. +function run-gcloud-command() { + local master_name="${1}" + local zone="${2}" + local command="${3}" + + local retries=5 + local sleep_sec=10 + + local result="" + + for attempt in $(seq 1 ${retries}); do + if result=$(gcloud compute ssh "${master_name}" --project "${PROJECT}" --zone "${zone}" --command "${command}" -- -oConnectTimeout=60 2>&1); then + echo "Successfully executed '${command}' on ${master_name}" + return 0 + fi + + sleep "${sleep_sec}" + done + echo "Failed to execute '${command}' on ${master_name} despite ${retries} attempts" >&2 + echo "Last attempt failed with: ${result}" >&2 + return 1 +} + + function create-master-instance-internal() { local gcloud="gcloud" local retries=5 @@ -101,6 +128,7 @@ function create-master-instance-internal() { local -r master_name="${1}" local -r address="${2:-}" + local -r internal_address="${3:-}" local preemptible_master="" if [[ "${PREEMPTIBLE_MASTER:-}" == "true" ]]; then @@ -153,6 +181,10 @@ function create-master-instance-internal() { ${preemptible_master} \ ${network} 2>&1); then echo "${result}" >&2 + + if [[ -n "${internal_address:-}" ]]; then + attach-internal-master-ip "${master_name}" "${ZONE}" "${internal_address}" + fi return 0 else echo "${result}" >&2 diff --git a/cluster/gce/util.sh b/cluster/gce/util.sh index 414f8630609..ca3ac4031e7 100755 --- a/cluster/gce/util.sh +++ b/cluster/gce/util.sh @@ -470,7 +470,17 @@ function detect-master() { exit 1 fi fi - echo "Using master: $KUBE_MASTER (external IP: $KUBE_MASTER_IP)" >&2 + if [[ -z "${KUBE_MASTER_INTERNAL_IP-}" ]] && [[ ${GCE_PRIVATE_CLUSTER:-} == "true" ]]; then + local master_address_name="${MASTER_NAME}-internal-ip" + echo "Looking for address '${master_address_name}'" >&2 + if ! KUBE_MASTER_INTERNAL_IP=$(gcloud compute addresses describe "${master_address_name}" \ + --project "${PROJECT}" --region "${REGION}" -q --format='value(address)') || \ + [[ -z "${KUBE_MASTER_INTERNAL_IP-}" ]]; then + echo "Could not detect Kubernetes master node. Make sure you've launched a cluster with 'kube-up.sh'" >&2 + exit 1 + fi + fi + echo "Using master: $KUBE_MASTER (external IP: $KUBE_MASTER_IP; internal IP: ${KUBE_MASTER_INTERNAL_IP:-(not set)})" >&2 } function load-or-gen-kube-bearertoken() { @@ -1161,6 +1171,7 @@ E2E_STORAGE_TEST_ENVIRONMENT: $(yaml-quote ${E2E_STORAGE_TEST_ENVIRONMENT:-}) KUBE_DOCKER_REGISTRY: $(yaml-quote ${KUBE_DOCKER_REGISTRY:-}) KUBE_ADDON_REGISTRY: $(yaml-quote ${KUBE_ADDON_REGISTRY:-}) MULTIZONE: $(yaml-quote ${MULTIZONE:-}) +MULTIMASTER: $(yaml-quote ${MULTIMASTER:-}) NON_MASQUERADE_CIDR: $(yaml-quote ${NON_MASQUERADE_CIDR:-}) ENABLE_DEFAULT_STORAGE_CLASS: $(yaml-quote ${ENABLE_DEFAULT_STORAGE_CLASS:-}) ENABLE_APISERVER_ADVANCED_AUDIT: $(yaml-quote ${ENABLE_APISERVER_ADVANCED_AUDIT:-}) @@ -2259,10 +2270,14 @@ function kube-up() { create-windows-nodes create-linux-nodes elif [[ ${KUBE_REPLICATE_EXISTING_MASTER:-} == "true" ]]; then + detect-master if [[ "${MASTER_OS_DISTRIBUTION}" != "gci" && "${MASTER_OS_DISTRIBUTION}" != "ubuntu" ]]; then echo "Master replication supported only for gci and ubuntu" return 1 fi + if [[ ${GCE_PRIVATE_CLUSTER:-} == "true" ]]; then + create-internal-loadbalancer + fi create-loadbalancer # If replication of master fails, we need to ensure that the replica is removed from etcd clusters. if ! replicate-master; then @@ -2696,17 +2711,27 @@ function create-master() { KUBERNETES_MASTER_NAME="${MASTER_RESERVED_IP}" MASTER_ADVERTISE_ADDRESS="${MASTER_RESERVED_IP}" - create-certs "${MASTER_RESERVED_IP}" + MASTER_INTERNAL_IP="" + if [[ ${GCE_PRIVATE_CLUSTER:-} == "true" ]]; then + gcloud compute addresses create "${MASTER_NAME}-internal-ip" --project "${PROJECT}" --region $REGION --subnet $SUBNETWORK + MASTER_INTERNAL_IP=$(gcloud compute addresses describe "${MASTER_NAME}-internal-ip" --project "${PROJECT}" --region "${REGION}" -q --format='value(address)') + echo "Master internal ip is: $MASTER_INTERNAL_IP" + KUBERNETES_MASTER_NAME="${MASTER_INTERNAL_IP}" + MASTER_ADVERTISE_ADDRESS="${MASTER_INTERNAL_IP}" + fi + + create-certs "${MASTER_RESERVED_IP}" "${MASTER_INTERNAL_IP}" create-etcd-certs ${MASTER_NAME} create-etcd-apiserver-certs "etcd-${MASTER_NAME}" ${MASTER_NAME} if [[ "$(get-num-nodes)" -ge "50" ]]; then # We block on master creation for large clusters to avoid doing too much # unnecessary work in case master start-up fails (like creation of nodes). - create-master-instance "${MASTER_RESERVED_IP}" + create-master-instance "${MASTER_RESERVED_IP}" "${MASTER_INTERNAL_IP}" else - create-master-instance "${MASTER_RESERVED_IP}" & + create-master-instance "${MASTER_RESERVED_IP}" "${MASTER_INTERNAL_IP}" & fi + } # Adds master replica to etcd cluster. @@ -2720,6 +2745,8 @@ function create-master() { # $1: etcd client port # $2: etcd internal port # returns the result of ssh command which adds replica +#### WARNING: THIS DOESN'T WORK IN CLUSTERS WITH MTLS ENABLED. +# TODO(mborsz): Fix this function add-replica-to-etcd() { local -r client_port="${1}" local -r internal_port="${2}" @@ -2778,6 +2805,10 @@ function replicate-master() { --project "${PROJECT}" \ --zone "${ZONE}" \ --instances "${REPLICA_NAME}" + + if [[ "${GCE_PRIVATE_CLUSTER:-}" == "true" ]]; then + add-to-internal-loadbalancer "${REPLICA_NAME}" "${ZONE}" + fi } # Detaches old and ataches new external IP to a VM. @@ -2817,8 +2848,6 @@ function attach-external-ip() { # ZONE # REGION function create-loadbalancer() { - detect-master - # Step 0: Return early if LB is already configured. if gcloud compute forwarding-rules describe ${MASTER_NAME} \ --project "${PROJECT}" --region ${REGION} > /dev/null 2>&1; then @@ -2860,6 +2889,140 @@ function create-loadbalancer() { echo "DONE" } + +# attach-internal-master-ip attach internal ip to existing master. +# +# Assumes: +# * PROJECT +function attach-internal-master-ip() { + local name="${1}" + local zone="${2}" + local ip="${3}" + + local aliases=$(gcloud compute instances describe "${name}" --project "${PROJECT}" --zone "${zone}" --flatten='networkInterfaces[0].aliasIpRanges[]' --format='value[separator=':'](networkInterfaces[0].aliasIpRanges.subnetworkRangeName,networkInterfaces[0].aliasIpRanges.ipCidrRange)' | sed 's/^://' | paste -s -d';' -) + aliases="${aliases:+${aliases};}${ip}/32" + echo "Setting ${name}'s aliases to '${aliases}' (added ${ip})" + # Attach ${ip} to ${name} + gcloud compute instances network-interfaces update "${name}" --project "${PROJECT}" --zone "${zone}" --aliases="${aliases}" + run-gcloud-command "${name}" "${zone}" "sudo ip route add to local ${ip}/32 dev eth0" + return $? +} + + +# detach-internal-master-ip detaches internal ip from existing master. +# +# Assumes: +# * PROJECT +function detach-internal-master-ip() { + local name="${1}" + local zone="${2}" + local ip="${3}" + + local aliases=$(gcloud compute instances describe "${name}" --project "${PROJECT}" --zone "${zone}" --flatten='networkInterfaces[0].aliasIpRanges[]' --format='value[separator=':'](networkInterfaces[0].aliasIpRanges.subnetworkRangeName,networkInterfaces[0].aliasIpRanges.ipCidrRange)' | sed 's/^://' | grep -v "${ip}" | paste -s -d';' -) + echo "Setting ${name}'s aliases to '${aliases}' (removed ${ip})" + # Detach ${MASTER_NAME}-internal-ip from ${name} + gcloud compute instances network-interfaces update "${name}" --project "${PROJECT}" --zone "${zone}" --aliases="${aliases}" + run-gcloud-command "${name}" "${zone}" "sudo ip route del to local ${ip}/32 dev eth0" + return $? +} + +# create-internal-loadbalancer creates an internal load balacer in front of existing master. +# +# Assumes: +# * MASTER_NAME +# * PROJECT +# * REGION +function create-internal-loadbalancer() { + if gcloud compute forwarding-rules describe "${MASTER_NAME}-internal" \ + --project "${PROJECT}" --region ${REGION} > /dev/null 2>&1; then + echo "Load balancer already exists" + return + fi + + local EXISTING_MASTER_NAME="$(get-all-replica-names)" + local EXISTING_MASTER_ZONE=$(gcloud compute instances list "${EXISTING_MASTER_NAME}" \ + --project "${PROJECT}" --format="value(zone)") + + echo "Detaching ${KUBE_MASTER_INTERNAL_IP} from ${EXISTING_MASTER_NAME}/${EXISTING_MASTER_ZONE}" + detach-internal-master-ip "${EXISTING_MASTER_NAME}" "${EXISTING_MASTER_ZONE}" "${KUBE_MASTER_INTERNAL_IP}" + + echo "Creating internal load balancer with IP: ${KUBE_MASTER_INTERNAL_IP}" + gcloud compute health-checks --project "${PROJECT}" create tcp "${MASTER_NAME}-hc" --port=443 + + gcloud compute backend-services create "${MASTER_NAME}" \ + --project "${PROJECT}" \ + --region "${REGION}" \ + --protocol tcp \ + --region "${REGION}" \ + --load-balancing-scheme internal \ + --health-checks "${MASTER_NAME}-hc" + + gcloud compute forwarding-rules create "${MASTER_NAME}-internal" \ + --project "${PROJECT}" \ + --region "${REGION}" \ + --load-balancing-scheme internal \ + --network "${NETWORK}" \ + --subnet "${SUBNETWORK}" \ + --address "${KUBE_MASTER_INTERNAL_IP}" \ + --ip-protocol TCP \ + --ports 443 \ + --backend-service "${MASTER_NAME}" \ + --backend-service-region "${REGION}" + + echo "Adding ${EXISTING_MASTER_NAME}/${EXISTING_MASTER_ZONE} to the load balancer" + add-to-internal-loadbalancer "${EXISTING_MASTER_NAME}" "${EXISTING_MASTER_ZONE}" +} + +# add-to-internal-loadbalancer adds an instance to ILB. +# Assumes: +# * MASTER_NAME +# * PROJECT +# * REGION +function add-to-internal-loadbalancer() { + local name="${1}" + local zone="${2}" + + gcloud compute instance-groups unmanaged create "${name}" --project "${PROJECT}" --zone "${zone}" + gcloud compute instance-groups unmanaged add-instances "${name}" --project "${PROJECT}" --zone "${zone}" --instances "${name}" + gcloud compute backend-services add-backend "${MASTER_NAME}" \ + --project "${PROJECT}" \ + --region "${REGION}" \ + --instance-group "${name}" \ + --instance-group-zone "${zone}" +} + +# remove-from-internal-loadbalancer removes an instance from ILB. +# Assumes: +# * MASTER_NAME +# * PROJECT +# * REGION +function remove-from-internal-loadbalancer() { + local name="${1}" + local zone="${2}" + + if gcloud compute instance-groups unmanaged describe "${name}" --project "${PROJECT}" --zone "${zone}" &>/dev/null; then + gcloud compute backend-services remove-backend "${MASTER_NAME}" \ + --project "${PROJECT}" \ + --region "${REGION}" \ + --instance-group "${name}" \ + --instance-group-zone "${zone}" + gcloud compute instance-groups unmanaged delete "${name}" --project "${PROJECT}" --zone "${zone}" --quiet + fi +} + +function delete-internal-loadbalancer() { + if gcloud compute forwarding-rules describe "${MASTER_NAME}-internal" --project "${PROJECT}" --region "${REGION}" &>/dev/null; then + gcloud compute forwarding-rules delete "${MASTER_NAME}-internal" --project "${PROJECT}" --region "${REGION}" --quiet + fi + + if gcloud compute backend-services describe "${MASTER_NAME}" --project "${PROJECT}" --region "${REGION}" &>/dev/null; then + gcloud compute backend-services delete "${MASTER_NAME}" --project "${PROJECT}" --region "${REGION}" --quiet + fi + if gcloud compute health-checks describe "${MASTER_NAME}-gc" --project "${PROJECT}" &>/dev/null; then + gcloud compute health-checks delete "${MASTER_NAME}-gc" --project "${PROJECT}" --quiet + fi +} + function create-nodes-firewall() { # Create a single firewall rule for all minions. create-firewall-rule "${NODE_TAG}-all" "${CLUSTER_IP_RANGE}" "${NODE_TAG}" & @@ -3203,6 +3366,8 @@ function check-cluster() { # # $1: etcd client port # returns the result of ssh command which removes replica +#### WARNING: THIS DOESN'T WORK IN CLUSTERS WITH MTLS ENABLED. +# TODO(mborsz): Fix this function remove-replica-from-etcd() { local -r port="${1}" [[ -n "${EXISTING_MASTER_NAME}" ]] || return @@ -3298,6 +3463,10 @@ function kube-down() { --zone "${ZONE}" \ --instances "${REPLICA_NAME}" fi + # Detach replica from LB if needed. + if [[ ${GCE_PRIVATE_CLUSTER:-} == "true" ]]; then + remove-from-internal-loadbalancer "${REPLICA_NAME}" "${ZONE}" + fi # Now we can safely delete the VM. gcloud compute instances delete \ --project "${PROJECT}" \ @@ -3343,6 +3512,12 @@ function kube-down() { --quiet \ "${MASTER_NAME}" fi + + if [[ ${GCE_PRIVATE_CLUSTER:-} == "true" ]]; then + remove-from-internal-loadbalancer "${REMAINING_REPLICA_NAME}" "${REMAINING_REPLICA_ZONE}" + delete-internal-loadbalancer + attach-internal-master-ip "${REMAINING_REPLICA_NAME}" "${REMAINING_REPLICA_ZONE}" "${KUBE_MASTER_INTERNAL_IP}" + fi fi # If there are no more remaining master replicas, we should delete all remaining network resources. @@ -3357,6 +3532,14 @@ function kube-down() { --quiet \ "${MASTER_NAME}-ip" fi + + if gcloud compute addresses describe "${MASTER_NAME}-internal-ip" --region "${REGION}" --project "${PROJECT}" &>/dev/null; then + gcloud compute addresses delete \ + --project "${PROJECT}" \ + --region "${REGION}" \ + --quiet \ + "${MASTER_NAME}-internal-ip" + fi fi if [[ "${KUBE_DELETE_NODES:-}" != "false" ]]; then