mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-13 13:55:41 +00:00
Allow some NotReady nodes in 1000 node clusters
This commit is contained in:
parent
3e04a45a95
commit
6aaabc6f46
@ -15,6 +15,10 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
# Validates that the cluster is healthy.
|
# Validates that the cluster is healthy.
|
||||||
|
# Error codes are:
|
||||||
|
# 0 - success
|
||||||
|
# 1 - fatal (cluster is unlikely to work)
|
||||||
|
# 2 - non-fatal (encountered some errors, but cluster should be working correctly)
|
||||||
|
|
||||||
set -o errexit
|
set -o errexit
|
||||||
set -o nounset
|
set -o nounset
|
||||||
@ -29,11 +33,14 @@ fi
|
|||||||
source "${KUBE_ROOT}/cluster/kube-env.sh"
|
source "${KUBE_ROOT}/cluster/kube-env.sh"
|
||||||
source "${KUBE_ROOT}/cluster/kube-util.sh"
|
source "${KUBE_ROOT}/cluster/kube-util.sh"
|
||||||
|
|
||||||
|
ALLOWED_NOTREADY_NODES="${ALLOWED_NOTREADY_NODES:-0}"
|
||||||
|
|
||||||
EXPECTED_NUM_NODES="${NUM_NODES}"
|
EXPECTED_NUM_NODES="${NUM_NODES}"
|
||||||
if [[ "${REGISTER_MASTER_KUBELET:-}" == "true" ]]; then
|
if [[ "${REGISTER_MASTER_KUBELET:-}" == "true" ]]; then
|
||||||
EXPECTED_NUM_NODES=$((EXPECTED_NUM_NODES+1))
|
EXPECTED_NUM_NODES=$((EXPECTED_NUM_NODES+1))
|
||||||
fi
|
fi
|
||||||
# Make several attempts to deal with slow cluster birth.
|
# Make several attempts to deal with slow cluster birth.
|
||||||
|
return_value=0
|
||||||
attempt=0
|
attempt=0
|
||||||
while true; do
|
while true; do
|
||||||
# The "kubectl get nodes -o template" exports node information.
|
# The "kubectl get nodes -o template" exports node information.
|
||||||
@ -59,7 +66,12 @@ while true; do
|
|||||||
if (( attempt > 100 )); then
|
if (( attempt > 100 )); then
|
||||||
echo -e "${color_red}Detected ${ready} ready nodes, found ${found} nodes out of expected ${EXPECTED_NUM_NODES}. Your cluster may not be fully functional.${color_norm}"
|
echo -e "${color_red}Detected ${ready} ready nodes, found ${found} nodes out of expected ${EXPECTED_NUM_NODES}. Your cluster may not be fully functional.${color_norm}"
|
||||||
"${KUBE_ROOT}/cluster/kubectl.sh" get nodes
|
"${KUBE_ROOT}/cluster/kubectl.sh" get nodes
|
||||||
exit 2
|
if [ "$((${EXPECTED_NUM_NODES} - ${found}))" -gt "${ALLOWED_NOTREADY_NODES}" ]; then
|
||||||
|
exit 1
|
||||||
|
else
|
||||||
|
return_value=2
|
||||||
|
break
|
||||||
|
fi
|
||||||
else
|
else
|
||||||
echo -e "${color_yellow}Waiting for ${EXPECTED_NUM_NODES} ready nodes. ${ready} ready nodes, ${found} registered. Retrying.${color_norm}"
|
echo -e "${color_yellow}Waiting for ${EXPECTED_NUM_NODES} ready nodes. ${ready} ready nodes, ${found} registered. Retrying.${color_norm}"
|
||||||
fi
|
fi
|
||||||
@ -99,4 +111,10 @@ done
|
|||||||
|
|
||||||
echo "Validate output:"
|
echo "Validate output:"
|
||||||
"${KUBE_ROOT}/cluster/kubectl.sh" get cs
|
"${KUBE_ROOT}/cluster/kubectl.sh" get cs
|
||||||
|
if [ "${return_value}" == "0" ]; then
|
||||||
echo -e "${color_green}Cluster validation succeeded${color_norm}"
|
echo -e "${color_green}Cluster validation succeeded${color_norm}"
|
||||||
|
else
|
||||||
|
echo -e "${color_yellow}Cluster validation encountered some problems, but cluster should be in working order${color_norm}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
exit "${return_value}"
|
||||||
|
@ -508,6 +508,7 @@ case ${JOB_NAME} in
|
|||||||
|
|
||||||
# Runs the performance/scalability test on huge 1000-node cluster on GCE.
|
# Runs the performance/scalability test on huge 1000-node cluster on GCE.
|
||||||
# Flannel is used as network provider.
|
# Flannel is used as network provider.
|
||||||
|
# Allows a couple of nodes to be NotReady during startup
|
||||||
kubernetes-e2e-gce-enormous-cluster)
|
kubernetes-e2e-gce-enormous-cluster)
|
||||||
: ${E2E_CLUSTER_NAME:="jenkins-gce-enormous-cluster"}
|
: ${E2E_CLUSTER_NAME:="jenkins-gce-enormous-cluster"}
|
||||||
: ${E2E_NETWORK:="e2e-enormous-cluster"}
|
: ${E2E_NETWORK:="e2e-enormous-cluster"}
|
||||||
@ -526,6 +527,32 @@ case ${JOB_NAME} in
|
|||||||
NODE_SIZE="n1-standard-1"
|
NODE_SIZE="n1-standard-1"
|
||||||
NODE_DISK_SIZE="50GB"
|
NODE_DISK_SIZE="50GB"
|
||||||
NUM_NODES="1000"
|
NUM_NODES="1000"
|
||||||
|
ALLOWED_NOTREADY_NODES="2"
|
||||||
|
# Reduce logs verbosity
|
||||||
|
TEST_CLUSTER_LOG_LEVEL="--v=1"
|
||||||
|
# Increase resync period to simulate production
|
||||||
|
TEST_CLUSTER_RESYNC_PERIOD="--min-resync-period=12h"
|
||||||
|
;;
|
||||||
|
|
||||||
|
# Starts and tears down 1000-node cluster on GCE using flannel networking
|
||||||
|
# Requires all 1000 nodes to come up.
|
||||||
|
kubernetes-e2e-gce-enormous-startup)
|
||||||
|
: ${E2E_CLUSTER_NAME:="jenkins-gce-enormous-startup"}
|
||||||
|
# TODO: increase a quota for networks in kubernetes-scale and move this test to its own network
|
||||||
|
: ${E2E_NETWORK:="e2e-enormous-cluster"}
|
||||||
|
: ${E2E_TEST:="false"}
|
||||||
|
: ${KUBE_GCE_INSTANCE_PREFIX:="e2e-enormous-startup"}
|
||||||
|
: ${PROJECT:="kubernetes-scale"}
|
||||||
|
# Override GCE defaults.
|
||||||
|
NETWORK_PROVIDER="flannel"
|
||||||
|
# Temporarily switch of Heapster, as this will not schedule anywhere.
|
||||||
|
# TODO: Think of a solution to enable it.
|
||||||
|
ENABLE_CLUSTER_MONITORING="none"
|
||||||
|
E2E_ZONE="asia-east1-a"
|
||||||
|
MASTER_SIZE="n1-standard-32"
|
||||||
|
NODE_SIZE="n1-standard-1"
|
||||||
|
NODE_DISK_SIZE="50GB"
|
||||||
|
NUM_NODES="1000"
|
||||||
# Reduce logs verbosity
|
# Reduce logs verbosity
|
||||||
TEST_CLUSTER_LOG_LEVEL="--v=1"
|
TEST_CLUSTER_LOG_LEVEL="--v=1"
|
||||||
# Increase resync period to simulate production
|
# Increase resync period to simulate production
|
||||||
@ -900,6 +927,7 @@ export KUBE_GCE_NODE_IMAGE=${KUBE_GCE_NODE_IMAGE:-}
|
|||||||
export KUBE_OS_DISTRIBUTION=${KUBE_OS_DISTRIBUTION:-}
|
export KUBE_OS_DISTRIBUTION=${KUBE_OS_DISTRIBUTION:-}
|
||||||
export GCE_SERVICE_ACCOUNT=$(gcloud auth list 2> /dev/null | grep active | cut -f3 -d' ')
|
export GCE_SERVICE_ACCOUNT=$(gcloud auth list 2> /dev/null | grep active | cut -f3 -d' ')
|
||||||
export FAIL_ON_GCP_RESOURCE_LEAK="${FAIL_ON_GCP_RESOURCE_LEAK:-false}"
|
export FAIL_ON_GCP_RESOURCE_LEAK="${FAIL_ON_GCP_RESOURCE_LEAK:-false}"
|
||||||
|
export ALLOWED_NOTREADY_NODES=${ALLOWED_NOTREADY_NODES:-}
|
||||||
|
|
||||||
# GKE variables
|
# GKE variables
|
||||||
export CLUSTER_NAME=${E2E_CLUSTER_NAME}
|
export CLUSTER_NAME=${E2E_CLUSTER_NAME}
|
||||||
|
Loading…
Reference in New Issue
Block a user