From 018b11bc3a271691961bcd1e0170b699bd5ea43e Mon Sep 17 00:00:00 2001 From: Zach Loafman Date: Wed, 11 Mar 2015 13:42:41 -0700 Subject: [PATCH] Retry object creation with --validate in kube-addons The better solution is some fence with Salt, but the actual logs provided in the bug don't support any race condition here, plus the ordering in the Salt configuration seems correct. We haven't seen this again in a while, but given the results of the situation (a borked cluster), I'm proposing a relatively simple workaround. Fixes #4357 (dubiously) --- .../saltbase/salt/kube-addons/kube-addons.sh | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/cluster/saltbase/salt/kube-addons/kube-addons.sh b/cluster/saltbase/salt/kube-addons/kube-addons.sh index 2cdb146a82f..593d57b93d9 100644 --- a/cluster/saltbase/salt/kube-addons/kube-addons.sh +++ b/cluster/saltbase/salt/kube-addons/kube-addons.sh @@ -17,10 +17,23 @@ # The business logic for whether a given object should be created # was already enforced by salt, and /etc/kubernetes/addons is the # managed result is of that. Start everything below that directory. -echo "== Kubernetes addon manager started at $(date -Is) ==" KUBECTL=/usr/local/bin/kubectl + +function create-object() { + obj=$1 + + for tries in {1..5}; do + if ${KUBECTL} --server="127.0.0.1:8080" create --validate=true -f ${obj}; then + return + fi + echo "++ ${obj} failed, attempt ${try} (sleeping 5) ++" + sleep 5 + done +} + +echo "== Kubernetes addon manager started at $(date -Is) ==" for obj in $(find /etc/kubernetes/addons -name \*.yaml); do - ${KUBECTL} --server="127.0.0.1:8080" create -f ${obj} & + create-object ${obj} & echo "++ addon ${obj} started in pid $! ++" done noerrors="true"