diff --git a/cluster/addons/addon-manager/CHANGELOG.md b/cluster/addons/addon-manager/CHANGELOG.md index 4b9a866b068..858c9e0c5c7 100644 --- a/cluster/addons/addon-manager/CHANGELOG.md +++ b/cluster/addons/addon-manager/CHANGELOG.md @@ -1,7 +1,10 @@ -## Version 9.1.1 (Wed May 19 2020 Antoni Zawodny ) +### Version 9.1.2 (Thu August 6 2020 Spencer Peterson ) + - Fix `start_addon` overwriting resources with `addonmanager.kubernetes.io/mode=EnsureExists`. + +### Version 9.1.1 (Wed May 19 2020 Antoni Zawodny ) - Fix kube-addons.sh and kubectl permissions -## Version 9.1.0 (Wed May 13 2020 Antoni Zawodny ) +### Version 9.1.0 (Wed May 13 2020 Antoni Zawodny ) - Enable overriding the default list of whitelisted resources ### Version 9.0.2 (Thu August 1 2019 Maciej Borsz diff --git a/cluster/addons/addon-manager/Dockerfile b/cluster/addons/addon-manager/Dockerfile index e084f6578e2..fd8d5690392 100644 --- a/cluster/addons/addon-manager/Dockerfile +++ b/cluster/addons/addon-manager/Dockerfile @@ -17,6 +17,7 @@ FROM BASEIMAGE RUN clean-install bash ADD kube-addons.sh /opt/ +ADD kube-addons-main.sh /opt/ ADD kubectl /usr/local/bin/ -CMD ["/opt/kube-addons.sh"] +CMD ["/opt/kube-addons-main.sh"] diff --git a/cluster/addons/addon-manager/Makefile b/cluster/addons/addon-manager/Makefile index 709448a5627..5b990581e4d 100644 --- a/cluster/addons/addon-manager/Makefile +++ b/cluster/addons/addon-manager/Makefile @@ -15,7 +15,7 @@ IMAGE=staging-k8s.gcr.io/kube-addon-manager ARCH?=amd64 TEMP_DIR:=$(shell mktemp -d) -VERSION=v9.1.1 +VERSION=v9.1.2 KUBECTL_VERSION?=v1.13.2 BASEIMAGE=k8s.gcr.io/debian-base-$(ARCH):v1.0.0 @@ -29,7 +29,7 @@ all: build build: cp ./* $(TEMP_DIR) curl -sSL --retry 5 https://dl.k8s.io/release/$(KUBECTL_VERSION)/bin/linux/$(ARCH)/kubectl > $(TEMP_DIR)/kubectl - chmod a+rx $(TEMP_DIR)/kube-addons.sh $(TEMP_DIR)/kubectl + chmod a+rx $(TEMP_DIR)/kube-addons.sh $(TEMP_DIR)/kube-addons-main.sh $(TEMP_DIR)/kubectl cd $(TEMP_DIR) && sed -i.back "s|BASEIMAGE|$(BASEIMAGE)|g" Dockerfile ifneq ($(ARCH),amd64) @@ -48,5 +48,11 @@ ifeq ($(ARCH),amd64) docker push $(IMAGE):$(VERSION) endif +test: + cp ./* $(TEMP_DIR) + curl -sSL --retry 5 https://dl.k8s.io/release/$(KUBECTL_VERSION)/bin/linux/$(ARCH)/kubectl > $(TEMP_DIR)/kubectl + chmod a+rx $(TEMP_DIR)/kube-addons.sh $(TEMP_DIR)/kube-addons-test.sh $(TEMP_DIR)/kubectl + cd $(TEMP_DIR) && KUBECTL_BIN=$(TEMP_DIR)/kubectl ./kube-addons-test.sh + clean: docker rmi -f $(IMAGE)-$(ARCH):$(VERSION) diff --git a/cluster/addons/addon-manager/kube-addons-main.sh b/cluster/addons/addon-manager/kube-addons-main.sh new file mode 100755 index 00000000000..849973470d1 --- /dev/null +++ b/cluster/addons/addon-manager/kube-addons-main.sh @@ -0,0 +1,78 @@ +#!/usr/bin/env bash + +# Copyright 2020 The Kubernetes Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Import required functions. The addon manager is installed to /opt in +# production use (see the Dockerfile) +# Disabling shellcheck following files as the full path would be required. +if [ -f "kube-addons.sh" ]; then + # shellcheck disable=SC1091 + source "kube-addons.sh" +elif [ -f "/opt/kube-addons.sh" ]; then + # shellcheck disable=SC1091 + source "/opt/kube-addons.sh" +else + # If the required source is missing, we have to fail. + log ERR "== Could not find kube-addons.sh (not in working directory or /opt) at $(date -Is) ==" + exit 1 +fi + +# The business logic for whether a given object should be created +# was already enforced by salt, and /etc/kubernetes/addons is the +# managed result of that. Start everything below that directory. +log INFO "== Kubernetes addon manager started at $(date -Is) with ADDON_CHECK_INTERVAL_SEC=${ADDON_CHECK_INTERVAL_SEC} ==" + +# Wait for the default service account to be created in the kube-system namespace. +token_found="" +while [ -z "${token_found}" ]; do + sleep .5 + # shellcheck disable=SC2086 + # Disabling because "${KUBECTL_OPTS}" needs to allow for expansion here + if ! token_found=$(${KUBECTL} ${KUBECTL_OPTS} get --namespace="${SYSTEM_NAMESPACE}" serviceaccount default -o go-template="{{with index .secrets 0}}{{.name}}{{end}}"); then + token_found=""; + log WRN "== Error getting default service account, retry in 0.5 second ==" + fi +done + +log INFO "== Default service account in the ${SYSTEM_NAMESPACE} namespace has token ${token_found} ==" + +# Create admission_control objects if defined before any other addon services. If the limits +# are defined in a namespace other than default, we should still create the limits for the +# default namespace. +while IFS=$'\n' read -r obj; do + start_addon "${obj}" 100 10 default & + log INFO "++ obj ${obj} is created ++" +done < <(find /etc/kubernetes/admission-controls \( -name \*.yaml -o -name \*.json \)) + +# Start the apply loop. +# Check if the configuration has changed recently - in case the user +# created/updated/deleted the files on the master. +log INFO "== Entering periodical apply loop at $(date -Is) ==" +while true; do + start_sec=$(date +"%s") + if is_leader; then + ensure_addons + reconcile_addons + else + log INFO "Not elected leader, going back to sleep." + fi + end_sec=$(date +"%s") + len_sec=$((end_sec-start_sec)) + # subtract the time passed from the sleep time + if [[ ${len_sec} -lt ${ADDON_CHECK_INTERVAL_SEC} ]]; then + sleep_time=$((ADDON_CHECK_INTERVAL_SEC-len_sec)) + sleep ${sleep_time} + fi +done diff --git a/cluster/addons/addon-manager/kube-addons-test.sh b/cluster/addons/addon-manager/kube-addons-test.sh new file mode 100755 index 00000000000..a4cea8e2ad1 --- /dev/null +++ b/cluster/addons/addon-manager/kube-addons-test.sh @@ -0,0 +1,286 @@ +#!/usr/bin/env bash + +# Copyright 2020 The Kubernetes Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# These tests enforce behavior of kube-addon-manager functions against a real +# cluster. A working Kubernetes cluster must be set up with kubectl configured. +# To run with the released version of kubectl, use `make test`. + +set -o errexit +set -o pipefail +set -o nounset + +# Default kubectl to the test users installation if needed. +KUBECTL_BIN="${KUBECTL_BIN:-kubectl}" + +# Disabling shellcheck following files as the full path would be required. +# shellcheck disable=SC1091 +source "kube-addons.sh" + +TEST_NS="kube-addon-manager-test" + +function retry() { + local tries=10 + while [ "${tries}" -gt 0 ]; do + "$@" && return 0; + (( tries-- )) + sleep 1 + done +} + +function setup(){ + retry kubectl create namespace "${TEST_NS}" +} + +function teardown() { + retry kubectl delete namespace "${TEST_NS}" +} + +function error() { + echo -e "\e[31m$*\e[0m" +} + +function echo_green() { + echo -e "\e[32m$*\e[0m" +} + +function echo_blue() { + echo -e "\e[34m$*\e[0m" +} + +function test_create_resource_reconcile() { + local limitrange + read -r -d '' limitrange << EOF +apiVersion: "v1" +kind: "LimitRange" +metadata: + name: "limits" + namespace: "${TEST_NS}" + labels: + addonmanager.kubernetes.io/mode: Reconcile +spec: + limits: + - type: "Container" + defaultRequest: + cpu: "100m" +EOF + + # arguments are yaml text, number of tries, delay, name of file, and namespace + echo_blue "Creating initial resource to test Reconcile mode" + create_resource_from_string "${limitrange}" "10" "1" "limitrange.yaml" "${TEST_NS}" + if ! (kubectl get limits/limits -n "${TEST_NS}"); then + error "failed to create limits w/ reconcile" + return 1 + elif ! (kubectl get limits/limits -n ${TEST_NS} -oyaml | grep --silent "100m"); then + error "limits does not match applied config" + return 1 + fi + + # Changes to addons with mode reconcile should be reflected. + echo_blue "Changes to manifest should be reflected in the cluster" + limitrange="${limitrange//100m/50m}" + create_resource_from_string "${limitrange}" "10" "1" "limitrange.yaml" "${TEST_NS}" + if kubectl get limits/limits -n ${TEST_NS} -oyaml | grep --silent "100m"; then + error "failed to update resource, still has 100m" + return 1 + elif ! (kubectl get limits/limits -n ${TEST_NS} -oyaml | grep --silent "50m"); then + error "failed to update resource, 50m limit was not reflected" + return 1 + fi + + # Finally, the users configuration will not be respected. + echo_blue "Changes the user makes should be overwritten by kube-addon-manager" + EDITOR="sed -i 's/50m/600m/'" kubectl edit limits/limits -n ${TEST_NS} + if kubectl get limits/limits -n ${TEST_NS} -oyaml | grep --silent "50m"; then + error "failed to edit resource with sed -- test is broken" + return 1 + fi + create_resource_from_string "${limitrange}" "10" "1" "limitrange.yaml" "${TEST_NS}" + if ! ( kubectl get limits/limits -n ${TEST_NS} -oyaml | grep --silent "50m"); then + error "failed to update resource, user config was respected when it should have been rewritten" + return 1 + fi +} + +function test_create_resource_ensureexists() { + local limitrange + read -r -d '' limitrange << EOF +apiVersion: "v1" +kind: "LimitRange" +metadata: + name: "limits" + namespace: "${TEST_NS}" + labels: + addonmanager.kubernetes.io/mode: EnsureExists +spec: + limits: + - type: "Container" + defaultRequest: + cpu: "100m" +EOF + + # arguments are yaml text, number of tries, delay, name of file, and namespace + echo_blue "Creating initial resource to test mode EnsureExists" + create_resource_from_string "${limitrange}" "10" "1" "limitrange.yaml" "${TEST_NS}" + if ! (kubectl get limits/limits -n "${TEST_NS}"); then + error "failed to create limits w/ EnsureExists" + return 1 + elif ! (kubectl get limits/limits -n ${TEST_NS} -oyaml | grep --silent "100m"); then + error "limits does not match applied config" + return 1 + fi + + # Changes to addons with mode EnsureExists should NOT be reflected. + echo_blue "Changes to the manifest should not be reconciled with the cluster" + limitrange="${limitrange//100m/50m}" + create_resource_from_string "${limitrange}" "10" "1" "limitrange.yaml" "${TEST_NS}" + if kubectl get limits/limits -n ${TEST_NS} -oyaml | grep --silent "50m"; then + error "failed to respect existing resource, was overwritten despite EnsureExists" + return 1 + fi + + # the users configuration must be respected + echo_blue "User configuration will be persisted for EnsureExists" + EDITOR="sed -i 's/100m/600m/'" kubectl edit limits/limits -n ${TEST_NS} + if kubectl get limits/limits -n ${TEST_NS} -oyaml | grep --silent "100m"; then + error "failed to edit resource with sed -- test is broken" + return 1 + fi + create_resource_from_string "${limitrange}" "10" "1" "limitrange.yaml" "${TEST_NS}" + if kubectl get limits/limits -n ${TEST_NS} -oyaml | grep --silent "100m"; then + error "failed to respect user changes to EnsureExists object" + return 1 + fi + + # unless they delete the object, in which case it should return + echo_blue "Missing EnsureExists resources will be re-created" + kubectl delete limits/limits -n ${TEST_NS} + if kubectl get limits/limits -n ${TEST_NS}; then + error "failed to delete limitrange" + return 1 + fi + create_resource_from_string "${limitrange}" "10" "1" "limitrange.yaml" "${TEST_NS}" + if ! kubectl get limits/limits -n ${TEST_NS}; then + error "failed to recreate deleted EnsureExists resource" + return 1 + fi +} + +function test_create_multiresource() { + local limitrange + read -r -d '' limitrange << EOF +apiVersion: "v1" +kind: "LimitRange" +metadata: + name: "limits" + namespace: "${TEST_NS}" + labels: + addonmanager.kubernetes.io/mode: EnsureExists +spec: + limits: + - type: "Container" + defaultRequest: + cpu: "100m" +--- +apiVersion: "v1" +kind: "LimitRange" +metadata: + name: "limits2" + namespace: "${TEST_NS}" + labels: + addonmanager.kubernetes.io/mode: Reconcile +spec: + limits: + - type: "Container" + defaultRequest: + cpu: "100m" +EOF + + # arguments are yaml text, number of tries, delay, name of file, and namespace + echo_blue "Creating initial resources from multi-resource manifest" + create_resource_from_string "${limitrange}" "10" "1" "limitrange.yaml" "${TEST_NS}" + if ! (kubectl get limits/limits -n "${TEST_NS}"); then + error "failed to create limits w/ EnsureExists" + return 1 + elif ! (kubectl get limits/limits2 -n "${TEST_NS}"); then + error "failed to create limits2 w/ Reconcile" + return 1 + fi + + # Changes to addons with mode EnsureExists should NOT be reflected. + # However, the mode=Reconcile addon should be changed. + echo_blue "Multi-resource manifest changes should apply to EnsureExists, not Reconcile" + limitrange="${limitrange//100m/50m}" + create_resource_from_string "${limitrange}" "10" "1" "limitrange.yaml" "${TEST_NS}" + if kubectl get limits/limits -n ${TEST_NS} -oyaml | grep --silent "50m"; then + error "failed to respect existing resource, was overwritten despite EnsureExists" + return 1 + elif kubectl get limits/limits2 -n ${TEST_NS} | grep --silent "100m"; then + error "failed to update resource with mode Reconcile" + return 1 + fi + + # the users configuration must be respected for EnsureExists + echo_blue "Multi-resource manifest should not overwrite user config in EnsureExists" + EDITOR="sed -i 's/100m/600m/'" kubectl edit limits/limits -n ${TEST_NS} + if kubectl get limits/limits -n ${TEST_NS} -oyaml | grep --silent "100m"; then + error "failed to edit resource with sed -- test is broken" + return 1 + fi + create_resource_from_string "${limitrange}" "10" "1" "limitrange.yaml" "${TEST_NS}" + if kubectl get limits/limits -n ${TEST_NS} -oyaml | grep --silent "100m"; then + error "failed to respect user changes to EnsureExists object" + return 1 + fi + + # But not for Reconcile. + echo_blue "Multi-resource manifest should overwrite user config in EnsureExists" + EDITOR="sed -i 's/50m/600m/'" kubectl edit limits/limits2 -n ${TEST_NS} + if kubectl get limits/limits2 -n ${TEST_NS} -oyaml | grep --silent "50m"; then + error "failed to edit resource with sed -- test is broken" + return 1 + fi + create_resource_from_string "${limitrange}" "10" "1" "limitrange.yaml" "${TEST_NS}" + if ! ( kubectl get limits/limits2 -n ${TEST_NS} -oyaml | grep --silent "50m"); then + error "failed to update resource, user config was respected when it should have been rewritten" + return 1 + fi +} + +function test_func() { + local -r name="${1}" + + echo_blue "=== TEST ${name}" + setup + if ! "${name}"; then + failures=$((failures+1)) + error "=== FAIL" + else + echo_green "=== PASS" + fi + teardown +} + +failures=0 +test_func test_create_resource_reconcile +test_func test_create_resource_ensureexists +test_func test_create_multiresource +if [ "${failures}" -gt 0 ]; then + error "no. failed tests: ${failures}" + error "FAIL" + exit 1 +else + echo_green "PASS" +fi diff --git a/cluster/addons/addon-manager/kube-addons.sh b/cluster/addons/addon-manager/kube-addons.sh index 8f4048e292e..b09575ad65f 100755 --- a/cluster/addons/addon-manager/kube-addons.sh +++ b/cluster/addons/addon-manager/kube-addons.sh @@ -57,9 +57,13 @@ else read -ra KUBECTL_PRUNE_WHITELIST <<< "${KUBECTL_PRUNE_WHITELIST_OVERRIDE}" fi +# This variable is unused in this file, but not in those that source it. +# shellcheck disable=SC2034 ADDON_CHECK_INTERVAL_SEC=${TEST_ADDON_CHECK_INTERVAL_SEC:-60} ADDON_PATH=${ADDON_PATH:-/etc/kubernetes/addons} +# This variable is unused in this file, but not in those that source it. +# shellcheck disable=SC2034 SYSTEM_NAMESPACE=kube-system # Addons could use this label with two modes: @@ -158,10 +162,8 @@ function create_resource_from_string() { local -r config_name=$4; local -r namespace=$5; while [ "${tries}" -gt 0 ]; do - # shellcheck disable=SC2086 - # Disabling because "${KUBECTL_OPTS}" needs to allow for expansion here - echo "${config_string}" | ${KUBECTL} ${KUBECTL_OPTS} --namespace="${namespace}" apply -f - && \ - log INFO "== Successfully started ${config_name} in namespace ${namespace} at $(date -Is)" && \ + reconcile_resource_from_string "${config_string}" "${config_name}" "${namespace}" && \ + ensure_resource_from_string "${config_string}" "${config_name}" "${namespace}" && \ return 0; (( tries-- )) log WRN "== Failed to start ${config_name} in namespace ${namespace} at $(date -Is). ${tries} tries remaining. ==" @@ -170,6 +172,63 @@ function create_resource_from_string() { return 1; } +# Creates resources with addon mode Reconcile for create_resource_from_string. +# Does not perform pruning. +# $1 string with json or yaml. +# $2 name of this object for logging +# $3 namespace for the object +function reconcile_resource_from_string() { + local -r config_string=$1; + local -r config_name=$2; + local -r namespace=$3; + + # kubectl_output must be declared ahead of time to allow capturing kubectl's exit code and not local's exit code. + local kubectl_output; + # shellcheck disable=SC2086 + # Disabling because "${KUBECTL_OPTS}" needs to allow for expansion here + kubectl_output=$(echo "${config_string}" | ${KUBECTL} ${KUBECTL_OPTS} apply -f - \ + --namespace="${namespace}" -l ${ADDON_MANAGER_LABEL}=Reconcile 2>&1) && \ + log INFO "== Successfully reconciled ${config_name} in namespace ${namespace} at $(date -Is)" && \ + return 0; + if echo "${kubectl_output}" | grep --silent "no objects"; then + # Nothing to do. + return 0; + fi + echo "${kubectl_output}" # for visibility of errors + return 1; +} + +# Creates resources with addon mode EnsureExists for create_resource_from_string. +# Does not perform pruning. +# $1 string with json or yaml. +# $2 name of this object for logging +# $3 namespace for the object +function ensure_resource_from_string() { + local -r config_string=$1; + local -r config_name=$2; + local -r namespace=$3; + + # Resources that are set to the addon mode EnsureExists should not be overwritten if they already exist. + local kubectl_output; + # shellcheck disable=SC2086 + # Disabling because "${KUBECTL_OPTS}" needs to allow for expansion here + kubectl_output=$(echo "${config_string}" | ${KUBECTL} ${KUBECTL_OPTS} create -f - \ + --namespace="${namespace}" -l ${ADDON_MANAGER_LABEL}=EnsureExists 2>&1) && \ + log INFO "== Successfully started ${config_name} in namespace ${namespace} at $(date -Is)" && \ + return 0; + # Detect an already exists failure for creating EnsureExists resources. + # All other errors should result in a retry. + if echo "${kubectl_output}" | grep --silent "AlreadyExists"; then + log INFO "== Skipping start ${config_name} in namespace ${namespace}, already exists at $(date -Is)" + return 0; + elif echo "${kubectl_output}" | grep --silent "no objects"; then + # Nothing to do. + return 0; + fi + echo "${kubectl_output}" # for visibility of errors + return 1; +} + function reconcile_addons() { # TODO: Remove the first command in future release. # Adding this for backward compatibility. Old addons have CLUSTER_SERVICE_LABEL=true and don't have @@ -226,51 +285,3 @@ function is_leader() { KUBE_CONTROLLER_MANAGER_LEADER="${KUBE_CONTROLLER_MANAGER_LEADER##${HOSTNAME}_*}" [[ "$KUBE_CONTROLLER_MANAGER_LEADER" == "" ]] } - -# The business logic for whether a given object should be created -# was already enforced by salt, and /etc/kubernetes/addons is the -# managed result of that. Start everything below that directory. -log INFO "== Kubernetes addon manager started at $(date -Is) with ADDON_CHECK_INTERVAL_SEC=${ADDON_CHECK_INTERVAL_SEC} ==" - -# Wait for the default service account to be created in the kube-system namespace. -token_found="" -while [ -z "${token_found}" ]; do - sleep .5 - # shellcheck disable=SC2086 - # Disabling because "${KUBECTL_OPTS}" needs to allow for expansion here - if ! token_found=$(${KUBECTL} ${KUBECTL_OPTS} get --namespace="${SYSTEM_NAMESPACE}" serviceaccount default -o go-template="{{with index .secrets 0}}{{.name}}{{end}}"); then - token_found=""; - log WRN "== Error getting default service account, retry in 0.5 second ==" - fi -done - -log INFO "== Default service account in the ${SYSTEM_NAMESPACE} namespace has token ${token_found} ==" - -# Create admission_control objects if defined before any other addon services. If the limits -# are defined in a namespace other than default, we should still create the limits for the -# default namespace. -while IFS=$'\n' read -r obj; do - start_addon "${obj}" 100 10 default & - log INFO "++ obj ${obj} is created ++" -done < <(find /etc/kubernetes/admission-controls \( -name \*.yaml -o -name \*.json \)) - -# Start the apply loop. -# Check if the configuration has changed recently - in case the user -# created/updated/deleted the files on the master. -log INFO "== Entering periodical apply loop at $(date -Is) ==" -while true; do - start_sec=$(date +"%s") - if is_leader; then - ensure_addons - reconcile_addons - else - log INFO "Not elected leader, going back to sleep." - fi - end_sec=$(date +"%s") - len_sec=$((end_sec-start_sec)) - # subtract the time passed from the sleep time - if [[ ${len_sec} -lt ${ADDON_CHECK_INTERVAL_SEC} ]]; then - sleep_time=$((ADDON_CHECK_INTERVAL_SEC-len_sec)) - sleep ${sleep_time} - fi -done