mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-04-28 11:44:38 +00:00
CI: Introduce retry mechanism for kubectl in gha-run.sh
Frequent errors have been observed during k8s e2e tests: - The connection to the server 127.0.0.1:6443 was refused - did you specify the right host or port? - Error from server (ServiceUnavailable): the server is currently unable to handle the request - Error from server (NotFound): the server could not find the requested resource These errors can be resolved by retrying the kubectl command. This commit introduces a wrapper function in common.sh that runs kubectl up to 3 times with a 5-second interval. Initially, this change only covers gha-run.sh for Kubernetes. Signed-off-by: Hyounggyu Choi <Hyounggyu.Choi@ibm.com>
This commit is contained in:
parent
b30d085271
commit
8ff128dda8
@ -80,6 +80,22 @@ function handle_error() {
|
|||||||
}
|
}
|
||||||
trap 'handle_error $LINENO' ERR
|
trap 'handle_error $LINENO' ERR
|
||||||
|
|
||||||
|
# A wrapper function for kubectl with retry logic
|
||||||
|
# runs the command up to 3 times with a 5-second interval
|
||||||
|
# to ensure successful execution
|
||||||
|
function kubectl_retry() {
|
||||||
|
local max_tries=3
|
||||||
|
local interval=5
|
||||||
|
local i=0
|
||||||
|
while true; do
|
||||||
|
kubectl $@ && return 0 || true
|
||||||
|
i=$((i + 1))
|
||||||
|
[ $i -lt $max_tries ] && echo "'kubectl $@' failed, retrying in $interval seconds" 1>&2 || break
|
||||||
|
sleep $interval
|
||||||
|
done
|
||||||
|
echo "'kubectl $@' failed after $max_tries tries" 1>&2 && return 1
|
||||||
|
}
|
||||||
|
|
||||||
function waitForProcess() {
|
function waitForProcess() {
|
||||||
wait_time="$1"
|
wait_time="$1"
|
||||||
sleep_time="$2"
|
sleep_time="$2"
|
||||||
|
@ -220,12 +220,12 @@ function deploy_kata() {
|
|||||||
grep "${DOCKER_REGISTRY}/${DOCKER_REPO}:${DOCKER_TAG}" "${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" || die "Failed to setup the tests image"
|
grep "${DOCKER_REGISTRY}/${DOCKER_REPO}:${DOCKER_TAG}" "${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" || die "Failed to setup the tests image"
|
||||||
echo "::endgroup::"
|
echo "::endgroup::"
|
||||||
|
|
||||||
kubectl apply -f "${tools_dir}/packaging/kata-deploy/kata-rbac/base/kata-rbac.yaml"
|
kubectl_retry apply -f "${tools_dir}/packaging/kata-deploy/kata-rbac/base/kata-rbac.yaml"
|
||||||
case "${KUBERNETES}" in
|
case "${KUBERNETES}" in
|
||||||
k0s) kubectl apply -k "${tools_dir}/packaging/kata-deploy/kata-deploy/overlays/k0s" ;;
|
k0s) kubectl_retry apply -k "${tools_dir}/packaging/kata-deploy/kata-deploy/overlays/k0s" ;;
|
||||||
k3s) kubectl apply -k "${tools_dir}/packaging/kata-deploy/kata-deploy/overlays/k3s" ;;
|
k3s) kubectl_retry apply -k "${tools_dir}/packaging/kata-deploy/kata-deploy/overlays/k3s" ;;
|
||||||
rke2) kubectl apply -k "${tools_dir}/packaging/kata-deploy/kata-deploy/overlays/rke2" ;;
|
rke2) kubectl_retry apply -k "${tools_dir}/packaging/kata-deploy/kata-deploy/overlays/rke2" ;;
|
||||||
*) kubectl apply -f "${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml"
|
*) kubectl_retry apply -f "${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml"
|
||||||
esac
|
esac
|
||||||
|
|
||||||
local cmd="kubectl -n kube-system get -l name=kata-deploy pod 2>/dev/null | grep '\<Running\>'"
|
local cmd="kubectl -n kube-system get -l name=kata-deploy pod 2>/dev/null | grep '\<Running\>'"
|
||||||
@ -241,11 +241,11 @@ function deploy_kata() {
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
echo "::group::kata-deploy logs"
|
echo "::group::kata-deploy logs"
|
||||||
kubectl -n kube-system logs --tail=100 -l name=kata-deploy
|
kubectl_retry -n kube-system logs --tail=100 -l name=kata-deploy
|
||||||
echo "::endgroup::"
|
echo "::endgroup::"
|
||||||
|
|
||||||
echo "::group::Runtime classes"
|
echo "::group::Runtime classes"
|
||||||
kubectl get runtimeclass
|
kubectl_retry get runtimeclass
|
||||||
echo "::endgroup::"
|
echo "::endgroup::"
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -395,7 +395,7 @@ function cleanup_kata_deploy() {
|
|||||||
esac
|
esac
|
||||||
|
|
||||||
# shellcheck disable=2086
|
# shellcheck disable=2086
|
||||||
kubectl delete ${deploy_spec}
|
kubectl_retry delete --ignore-not-found ${deploy_spec}
|
||||||
kubectl -n kube-system wait --timeout=10m --for=delete -l name=kata-deploy pod
|
kubectl -n kube-system wait --timeout=10m --for=delete -l name=kata-deploy pod
|
||||||
|
|
||||||
# Let the `kata-deploy` script take care of the runtime class creation / removal
|
# Let the `kata-deploy` script take care of the runtime class creation / removal
|
||||||
@ -419,12 +419,12 @@ function cleanup_kata_deploy() {
|
|||||||
cat "${tools_dir}/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml"
|
cat "${tools_dir}/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml"
|
||||||
grep "${DOCKER_REGISTRY}/${DOCKER_REPO}:${DOCKER_TAG}" "${tools_dir}/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml" || die "Failed to setup the tests image"
|
grep "${DOCKER_REGISTRY}/${DOCKER_REPO}:${DOCKER_TAG}" "${tools_dir}/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml" || die "Failed to setup the tests image"
|
||||||
# shellcheck disable=2086
|
# shellcheck disable=2086
|
||||||
kubectl apply ${cleanup_spec}
|
kubectl_retry apply ${cleanup_spec}
|
||||||
sleep 180s
|
sleep 180s
|
||||||
|
|
||||||
# shellcheck disable=2086
|
# shellcheck disable=2086
|
||||||
kubectl delete ${cleanup_spec}
|
kubectl_retry delete --ignore-not-found ${cleanup_spec}
|
||||||
kubectl delete -f "${tools_dir}/packaging/kata-deploy/kata-rbac/base/kata-rbac.yaml"
|
kubectl_retry delete --ignore-not-found -f "${tools_dir}/packaging/kata-deploy/kata-rbac/base/kata-rbac.yaml"
|
||||||
}
|
}
|
||||||
|
|
||||||
function cleanup() {
|
function cleanup() {
|
||||||
@ -513,11 +513,11 @@ function deploy_nydus_snapshotter() {
|
|||||||
misc/snapshotter/base/nydus-snapshotter.yaml
|
misc/snapshotter/base/nydus-snapshotter.yaml
|
||||||
|
|
||||||
# Deploy nydus snapshotter as a daemonset
|
# Deploy nydus snapshotter as a daemonset
|
||||||
kubectl create -f "misc/snapshotter/nydus-snapshotter-rbac.yaml"
|
kubectl_retry create -f "misc/snapshotter/nydus-snapshotter-rbac.yaml"
|
||||||
if [ "${KUBERNETES}" = "k3s" ]; then
|
if [ "${KUBERNETES}" = "k3s" ]; then
|
||||||
kubectl apply -k "misc/snapshotter/overlays/k3s"
|
kubectl_retry apply -k "misc/snapshotter/overlays/k3s"
|
||||||
else
|
else
|
||||||
kubectl apply -f "misc/snapshotter/base/nydus-snapshotter.yaml"
|
kubectl_retry apply -f "misc/snapshotter/base/nydus-snapshotter.yaml"
|
||||||
fi
|
fi
|
||||||
popd
|
popd
|
||||||
|
|
||||||
@ -525,9 +525,9 @@ function deploy_nydus_snapshotter() {
|
|||||||
|
|
||||||
echo "::endgroup::"
|
echo "::endgroup::"
|
||||||
echo "::group::nydus snapshotter logs"
|
echo "::group::nydus snapshotter logs"
|
||||||
pods_name=$(kubectl get pods --selector=app=nydus-snapshotter -n nydus-system -o=jsonpath='{.items[*].metadata.name}')
|
pods_name=$(kubectl_retry get pods --selector=app=nydus-snapshotter -n nydus-system -o=jsonpath='{.items[*].metadata.name}')
|
||||||
kubectl logs "${pods_name}" -n nydus-system
|
kubectl_retry logs "${pods_name}" -n nydus-system
|
||||||
kubectl describe pod "${pods_name}" -n nydus-system
|
kubectl_retry describe pod "${pods_name}" -n nydus-system
|
||||||
echo "::endgroup::"
|
echo "::endgroup::"
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -542,13 +542,13 @@ function cleanup_nydus_snapshotter() {
|
|||||||
pushd "$nydus_snapshotter_install_dir"
|
pushd "$nydus_snapshotter_install_dir"
|
||||||
|
|
||||||
if [ "${KUBERNETES}" = "k3s" ]; then
|
if [ "${KUBERNETES}" = "k3s" ]; then
|
||||||
kubectl delete -k "misc/snapshotter/overlays/k3s"
|
kubectl_retry delete --ignore-not-found -k "misc/snapshotter/overlays/k3s"
|
||||||
else
|
else
|
||||||
kubectl delete -f "misc/snapshotter/base/nydus-snapshotter.yaml"
|
kubectl_retry delete --ignore-not-found -f "misc/snapshotter/base/nydus-snapshotter.yaml"
|
||||||
fi
|
fi
|
||||||
sleep 180s
|
sleep 180s
|
||||||
kubectl delete -f "misc/snapshotter/nydus-snapshotter-rbac.yaml"
|
kubectl_retry delete --ignore-not-found -f "misc/snapshotter/nydus-snapshotter-rbac.yaml"
|
||||||
kubectl get namespace nydus-system -o json | jq 'del(.spec.finalizers)' | kubectl replace --raw "/api/v1/namespaces/nydus-system/finalize" -f - || true
|
kubectl_retry get namespace nydus-system -o json | jq 'del(.spec.finalizers)' | kubectl_retry replace --raw "/api/v1/namespaces/nydus-system/finalize" -f - || true
|
||||||
popd
|
popd
|
||||||
sleep 30s
|
sleep 30s
|
||||||
echo "::endgroup::"
|
echo "::endgroup::"
|
||||||
|
Loading…
Reference in New Issue
Block a user