From 7873e252f002412cebda3f6523bb6d005afa1200 Mon Sep 17 00:00:00 2001 From: Marek Biskup Date: Tue, 23 Jun 2015 16:57:12 +0200 Subject: [PATCH] addon updater should not retry too many times because specs may be invalid --- .../salt/kube-addons/kube-addon-update.sh | 46 +++++++++++++++---- .../saltbase/salt/kube-addons/kube-addons.sh | 14 ++++-- test/e2e/addon_update.go | 6 ++- 3 files changed, 51 insertions(+), 15 deletions(-) diff --git a/cluster/saltbase/salt/kube-addons/kube-addon-update.sh b/cluster/saltbase/salt/kube-addons/kube-addon-update.sh index 12293f728fa..dec1e7ce438 100755 --- a/cluster/saltbase/salt/kube-addons/kube-addon-update.sh +++ b/cluster/saltbase/salt/kube-addons/kube-addon-update.sh @@ -45,16 +45,19 @@ # global config KUBECTL=${TEST_KUBECTL:-/usr/local/bin/kubectl} # substitute for tests -NUM_TRIES_FOR_CREATE=${TEST_NUM_TRIES:-100} -DELAY_AFTER_CREATE_ERROR_SEC=${TEST_DELAY_AFTER_ERROR_SEC:=10} -NUM_TRIES_FOR_STOP=${TEST_NUM_TRIES:-100} -DELAY_AFTER_STOP_ERROR_SEC=${TEST_DELAY_AFTER_ERROR_SEC:=10} - if [[ ! -x ${KUBECTL} ]]; then echo "ERROR: kubectl command (${KUBECTL}) not found or is not executable" 1>&2 exit 1 fi +# If an add-on definition is incorrect, or a definition has just disappeared +# from the local directory, the script will still keep on retrying. +# The script does not end until all retries are done, so +# one invalid manifest may block updates of other add-ons. +# Be careful how you set these parameters +NUM_TRIES=1 # will be updated based on input parameters +DELAY_AFTER_ERROR_SEC=${TEST_DELAY_AFTER_ERROR_SEC:=10} + # remember that you can't log from functions that print some output (because # logs are also printed on stdout) @@ -227,14 +230,16 @@ function stop-object() { local -r obj_type=$1 local -r obj_name=$2 log INFO "Stopping ${obj_type} ${obj_name}" - run-until-success "${KUBECTL} stop ${obj_type} ${obj_name}" ${NUM_TRIES_FOR_STOP} ${DELAY_AFTER_STOP_ERROR_SEC} + run-until-success "${KUBECTL} stop ${obj_type} ${obj_name}" ${NUM_TRIES} ${DELAY_AFTER_ERROR_SEC} } function create-object() { local -r obj_type=$1 local -r file_path=$2 log INFO "Creating new ${obj_type} from file ${file_path}" - run-until-success "${KUBECTL} create -f ${file_path}" ${NUM_TRIES_FOR_CREATE} ${DELAY_AFTER_CREATE_ERROR_SEC} + # this will keep on failing if the ${file_path} disappeared in the meantime. + # Do not use too many retries. + run-until-success "${KUBECTL} create -f ${file_path}" ${NUM_TRIES} ${DELAY_AFTER_ERROR_SEC} } function update-object() { @@ -266,6 +271,12 @@ function create-objects() { local -r file_paths=$2 local file_path for file_path in ${file_paths}; do + # Remember that the file may have disappear by now + # But we don't want to check it here because + # such race condition may always happen after + # we check it. Let's have the race + # condition happen a bit more often so that + # we see that our tests pass anyway. create-object ${obj_type} ${file_path} & done } @@ -363,6 +374,11 @@ function match-objects() { log DB3 "matched_files=${matched_files}" + + # note that if the addon file is invalid (or got removed after listing files + # but before we managed to match it) it will not be matched to any + # of the existing objects. So we will treat it as a new file + # and try to create its object. for addon_path in ${addon_paths_in_files}; do echo ${matched_files} | grep "${addon_path}" >/dev/null if [[ $? -ne 0 ]]; then @@ -433,11 +449,21 @@ function update-addons() { fi } -if [[ $# -ne 1 ]]; then - echo "Illegal number of parameters" 1>&2 +# input parameters: +# $1 input directory +# $2 retry period in seconds - the script will retry api-server errors for approximately +# this amound of time (it is not very precise), at interval equal $DELAY_AFTER_ERROR_SEC. +# + +if [[ $# -ne 2 ]]; then + echo "Illegal number of parameters. Usage $0 addon-dir [retry-period]" 1>&2 exit 1 fi +NUM_TRIES=$(($2 / ${DELAY_AFTER_ERROR_SEC})) +if [[ ${NUM_TRIES} -le 0 ]]; then + NUM_TRIES=1 +fi + addon_path=$1 update-addons ${addon_path} - diff --git a/cluster/saltbase/salt/kube-addons/kube-addons.sh b/cluster/saltbase/salt/kube-addons/kube-addons.sh index 969ae469266..1ecc2bdd25f 100644 --- a/cluster/saltbase/salt/kube-addons/kube-addons.sh +++ b/cluster/saltbase/salt/kube-addons/kube-addons.sh @@ -120,7 +120,7 @@ function create-resource-from-string() { # The business logic for whether a given object should be created # was already enforced by salt, and /etc/kubernetes/addons is the # managed result is of that. Start everything below that directory. -echo "== Kubernetes addon manager started at $(date -Is) with ADDON_CHECK_INTERVAL_SEC=${ADDON_CHECK_INTERVAL_SEC}==" +echo "== Kubernetes addon manager started at $(date -Is) with ADDON_CHECK_INTERVAL_SEC=${ADDON_CHECK_INTERVAL_SEC} ==" # Load the kube-env, which has all the environment variables we care # about, in a flat yaml format. @@ -150,6 +150,7 @@ echo "== default service account has token ${token_found} ==" # NOTE: needs to run as root to read this file. # Read each line in the csv file of tokens. # Expect errors when the script is started again. +# NOTE: secrets are created asynchronously, in background. while read line; do # Split each line into the token and username. IFS=',' read -a parts <<< "${line}" @@ -176,7 +177,14 @@ done # Check if the configuration has changed recently - in case the user # created/updated/deleted the files on the master. while true; do + start_sec=$(date +"%s") #kube-addon-update.sh must be deployed in the same directory as this file - `dirname $0`/kube-addon-update.sh /etc/kubernetes/addons - sleep $ADDON_CHECK_INTERVAL_SEC + `dirname $0`/kube-addon-update.sh /etc/kubernetes/addons ${ADDON_CHECK_INTERVAL_SEC} + end_sec=$(date +"%s") + len_sec=$((${end_sec}-${start_sec})) + # subtract the time passed from the sleep time + if [[ ${len_sec} -lt ${ADDON_CHECK_INTERVAL_SEC} ]]; then + sleep_time=$((${ADDON_CHECK_INTERVAL_SEC}-${len_sec})) + sleep ${sleep_time} + fi done diff --git a/test/e2e/addon_update.go b/test/e2e/addon_update.go index f6f4d3659a5..2c2710be114 100644 --- a/test/e2e/addon_update.go +++ b/test/e2e/addon_update.go @@ -181,7 +181,7 @@ spec: ` var addonTestPollInterval = 3 * time.Second -var addonTestPollTimeout = 3 * time.Minute +var addonTestPollTimeout = 1 * time.Minute var addonNamespace = api.NamespaceDefault // addons are in the default namespace type stringPair struct { @@ -211,6 +211,8 @@ var _ = Describe("Addon update", func() { namespace, err = createTestingNS("addon-update-test", c) Expect(err).NotTo(HaveOccurred()) + // Reduce the addon update intervals so that we have faster response + // to changes in the addon directory. // do not use "service" command because it clears the environment variables sshExecAndVerify(sshClient, "sudo TEST_ADDON_CHECK_INTERVAL_SEC=1 /etc/init.d/kube-addons restart") }) @@ -361,7 +363,7 @@ func getSSHClient() (*ssh.Client, error) { func sshExecAndVerify(client *ssh.Client, cmd string) { _, _, rc, err := sshExec(client, cmd) Expect(err).NotTo(HaveOccurred()) - Expect(rc).To(Equal(0)) + Expect(rc).To(Equal(0), "error return code from executing command on the cluster: %s", cmd) } func sshExec(client *ssh.Client, cmd string) (string, string, int, error) {