mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-21 02:41:25 +00:00
Merge pull request #36999 from jszczepkowski/ha-e2e-onerepl
Automatic merge from submit-queue Fixed e2e tests for HA master. Set of fixes that allows HA master e2e tests to pass for removal/addition master replicas. The summary of changes: - fixed host name in etcd certs, - added cluster validation after kube-down, - fixed the number of master replicas in cluster validation, - made MULTIZONE=true required for HA master deployments, ensured we correctly handle MULTIZONE=true when user wants to create HA master but not kubelets in multiple zones, - extended verification of master replicas in HA master e2e tests.
This commit is contained in:
commit
acb8a3f7d5
@ -56,7 +56,7 @@ function replicate-master-instance() {
|
|||||||
ETCD_CA_KEY="$(echo "${kube_env}" | grep "ETCD_CA_KEY" | sed "s/^.*: '//" | sed "s/'$//")"
|
ETCD_CA_KEY="$(echo "${kube_env}" | grep "ETCD_CA_KEY" | sed "s/^.*: '//" | sed "s/'$//")"
|
||||||
ETCD_CA_CERT="$(echo "${kube_env}" | grep "ETCD_CA_CERT" | sed "s/^.*: '//" | sed "s/'$//")"
|
ETCD_CA_CERT="$(echo "${kube_env}" | grep "ETCD_CA_CERT" | sed "s/^.*: '//" | sed "s/'$//")"
|
||||||
|
|
||||||
create-etcd-certs "${ETCD_CA_CERT}" "${ETCD_CA_KEY}"
|
create-etcd-certs "${REPLICA_NAME}" "${ETCD_CA_CERT}" "${ETCD_CA_KEY}"
|
||||||
|
|
||||||
kube_env="$(echo "${kube_env}" | grep -v "ETCD_PEER_KEY")"
|
kube_env="$(echo "${kube_env}" | grep -v "ETCD_PEER_KEY")"
|
||||||
kube_env="$(echo -e "${kube_env}\nETCD_PEER_KEY: '${ETCD_PEER_KEY_BASE64}'")"
|
kube_env="$(echo -e "${kube_env}\nETCD_PEER_KEY: '${ETCD_PEER_KEY_BASE64}'")"
|
||||||
|
@ -52,7 +52,7 @@ function replicate-master-instance() {
|
|||||||
ETCD_CA_KEY="$(echo "${kube_env}" | grep "ETCD_CA_KEY" | sed "s/^.*: '//" | sed "s/'$//")"
|
ETCD_CA_KEY="$(echo "${kube_env}" | grep "ETCD_CA_KEY" | sed "s/^.*: '//" | sed "s/'$//")"
|
||||||
ETCD_CA_CERT="$(echo "${kube_env}" | grep "ETCD_CA_CERT" | sed "s/^.*: '//" | sed "s/'$//")"
|
ETCD_CA_CERT="$(echo "${kube_env}" | grep "ETCD_CA_CERT" | sed "s/^.*: '//" | sed "s/'$//")"
|
||||||
|
|
||||||
create-etcd-certs "${ETCD_CA_CERT}" "${ETCD_CA_KEY}"
|
create-etcd-certs "${REPLICA_NAME}" "${ETCD_CA_CERT}" "${ETCD_CA_KEY}"
|
||||||
|
|
||||||
kube_env="$(echo "${kube_env}" | grep -v "ETCD_PEER_KEY")"
|
kube_env="$(echo "${kube_env}" | grep -v "ETCD_PEER_KEY")"
|
||||||
kube_env="$(echo -e "${kube_env}\nETCD_PEER_KEY: '${ETCD_PEER_KEY_BASE64}'")"
|
kube_env="$(echo -e "${kube_env}\nETCD_PEER_KEY: '${ETCD_PEER_KEY_BASE64}'")"
|
||||||
|
@ -747,8 +747,9 @@ function get-master-disk-size() {
|
|||||||
# KUBE_TEMP: temporary directory
|
# KUBE_TEMP: temporary directory
|
||||||
#
|
#
|
||||||
# Args:
|
# Args:
|
||||||
# $1: CA certificate
|
# $1: host name
|
||||||
# $2: CA key
|
# $2: CA certificate
|
||||||
|
# $3: CA key
|
||||||
#
|
#
|
||||||
# If CA cert/key is empty, the function will also generate certs for CA.
|
# If CA cert/key is empty, the function will also generate certs for CA.
|
||||||
#
|
#
|
||||||
@ -759,8 +760,9 @@ function get-master-disk-size() {
|
|||||||
# ETCD_PEER_CERT_BASE64
|
# ETCD_PEER_CERT_BASE64
|
||||||
#
|
#
|
||||||
function create-etcd-certs {
|
function create-etcd-certs {
|
||||||
local ca_cert=${1:-}
|
local host=${1}
|
||||||
local ca_key=${2:-}
|
local ca_cert=${2:-}
|
||||||
|
local ca_key=${3:-}
|
||||||
|
|
||||||
mkdir -p "${KUBE_TEMP}/cfssl"
|
mkdir -p "${KUBE_TEMP}/cfssl"
|
||||||
pushd "${KUBE_TEMP}/cfssl"
|
pushd "${KUBE_TEMP}/cfssl"
|
||||||
@ -810,8 +812,8 @@ EOF
|
|||||||
./cfssl gencert -initca ca-csr.json | ./cfssljson -bare ca -
|
./cfssl gencert -initca ca-csr.json | ./cfssljson -bare ca -
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo '{"CN":"'"${MASTER_NAME}"'","hosts":[""],"key":{"algo":"ecdsa","size":256}}' \
|
echo '{"CN":"'"${host}"'","hosts":[""],"key":{"algo":"ecdsa","size":256}}' \
|
||||||
| ./cfssl gencert -ca=ca.pem -ca-key=ca-key.pem -config=ca-config.json -profile=client-server -hostname="${MASTER_NAME}" - \
|
| ./cfssl gencert -ca=ca.pem -ca-key=ca-key.pem -config=ca-config.json -profile=client-server -hostname="${host}" - \
|
||||||
| ./cfssljson -bare etcd
|
| ./cfssljson -bare etcd
|
||||||
|
|
||||||
ETCD_CA_KEY_BASE64=$(cat "ca-key.pem" | base64 | tr -d '\r\n')
|
ETCD_CA_KEY_BASE64=$(cat "ca-key.pem" | base64 | tr -d '\r\n')
|
||||||
@ -878,7 +880,7 @@ function create-master() {
|
|||||||
MASTER_ADVERTISE_ADDRESS="${MASTER_RESERVED_IP}"
|
MASTER_ADVERTISE_ADDRESS="${MASTER_RESERVED_IP}"
|
||||||
|
|
||||||
create-certs "${MASTER_RESERVED_IP}"
|
create-certs "${MASTER_RESERVED_IP}"
|
||||||
create-etcd-certs
|
create-etcd-certs ${MASTER_NAME}
|
||||||
|
|
||||||
# Sets MASTER_ROOT_DISK_SIZE that is used by create-master-instance
|
# Sets MASTER_ROOT_DISK_SIZE that is used by create-master-instance
|
||||||
get-master-root-disk-size
|
get-master-root-disk-size
|
||||||
@ -904,7 +906,7 @@ function add-replica-to-etcd() {
|
|||||||
--project "${PROJECT}" \
|
--project "${PROJECT}" \
|
||||||
--zone "${EXISTING_MASTER_ZONE}" \
|
--zone "${EXISTING_MASTER_ZONE}" \
|
||||||
--command \
|
--command \
|
||||||
"curl localhost:${client_port}/v2/members -XPOST -H \"Content-Type: application/json\" -d '{\"peerURLs\":[\"https://${REPLICA_NAME}:${internal_port}\"]}'"
|
"curl localhost:${client_port}/v2/members -XPOST -H \"Content-Type: application/json\" -d '{\"peerURLs\":[\"https://${REPLICA_NAME}:${internal_port}\"]}' -s"
|
||||||
return $?
|
return $?
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1381,7 +1383,7 @@ function kube-down() {
|
|||||||
if [[ "${REMAINING_MASTER_COUNT}" == "1" ]]; then
|
if [[ "${REMAINING_MASTER_COUNT}" == "1" ]]; then
|
||||||
if gcloud compute forwarding-rules describe "${MASTER_NAME}" --region "${REGION}" --project "${PROJECT}" &>/dev/null; then
|
if gcloud compute forwarding-rules describe "${MASTER_NAME}" --region "${REGION}" --project "${PROJECT}" &>/dev/null; then
|
||||||
detect-master
|
detect-master
|
||||||
local REMAINING_REPLICA_NAME="$(get-replica-name)"
|
local REMAINING_REPLICA_NAME="$(get-all-replica-names)"
|
||||||
local REMAINING_REPLICA_ZONE=$(gcloud compute instances list "${REMAINING_REPLICA_NAME}" \
|
local REMAINING_REPLICA_ZONE=$(gcloud compute instances list "${REMAINING_REPLICA_NAME}" \
|
||||||
--project "${PROJECT}" --format="value(zone)")
|
--project "${PROJECT}" --format="value(zone)")
|
||||||
gcloud compute forwarding-rules delete \
|
gcloud compute forwarding-rules delete \
|
||||||
@ -1476,6 +1478,21 @@ function kube-down() {
|
|||||||
# If there are no more remaining master replicas, we should update kubeconfig.
|
# If there are no more remaining master replicas, we should update kubeconfig.
|
||||||
export CONTEXT="${PROJECT}_${INSTANCE_PREFIX}"
|
export CONTEXT="${PROJECT}_${INSTANCE_PREFIX}"
|
||||||
clear-kubeconfig
|
clear-kubeconfig
|
||||||
|
else
|
||||||
|
# If some master replicas remain: cluster has been changed, we need to re-validate it.
|
||||||
|
echo "... calling validate-cluster" >&2
|
||||||
|
# Override errexit
|
||||||
|
(validate-cluster) && validate_result="$?" || validate_result="$?"
|
||||||
|
|
||||||
|
# We have two different failure modes from validate cluster:
|
||||||
|
# - 1: fatal error - cluster won't be working correctly
|
||||||
|
# - 2: weak error - something went wrong, but cluster probably will be working correctly
|
||||||
|
# We just print an error message in case 2).
|
||||||
|
if [[ "${validate_result}" == "1" ]]; then
|
||||||
|
exit 1
|
||||||
|
elif [[ "${validate_result}" == "2" ]]; then
|
||||||
|
echo "...ignoring non-fatal errors in validate-cluster" >&2
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
set -e
|
set -e
|
||||||
}
|
}
|
||||||
@ -1511,6 +1528,19 @@ function get-all-replica-names() {
|
|||||||
--format "value(name)" | tr "\n" "," | sed 's/,$//')
|
--format "value(name)" | tr "\n" "," | sed 's/,$//')
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Prints the number of all of the master replicas in all zones.
|
||||||
|
#
|
||||||
|
# Assumed vars:
|
||||||
|
# MASTER_NAME
|
||||||
|
function get-master-replicas-count() {
|
||||||
|
detect-project
|
||||||
|
local num_masters=$(gcloud compute instances list \
|
||||||
|
--project "${PROJECT}" \
|
||||||
|
--regexp "$(get-replica-name-regexp)" \
|
||||||
|
--format "value(zone)" | wc -l)
|
||||||
|
echo -n "${num_masters}"
|
||||||
|
}
|
||||||
|
|
||||||
# Prints regexp for full master machine name. In a cluster with replicated master,
|
# Prints regexp for full master machine name. In a cluster with replicated master,
|
||||||
# VM names may either be MASTER_NAME or MASTER_NAME with a suffix for a replica.
|
# VM names may either be MASTER_NAME or MASTER_NAME with a suffix for a replica.
|
||||||
function get-replica-name-regexp() {
|
function get-replica-name-regexp() {
|
||||||
@ -1786,7 +1816,7 @@ function test-setup() {
|
|||||||
# Detect the project into $PROJECT if it isn't set
|
# Detect the project into $PROJECT if it isn't set
|
||||||
detect-project
|
detect-project
|
||||||
|
|
||||||
if [[ ${MULTIZONE:-} == "true" ]]; then
|
if [[ ${MULTIZONE:-} == "true" && -n ${E2E_ZONES:-} ]]; then
|
||||||
for KUBE_GCE_ZONE in ${E2E_ZONES}
|
for KUBE_GCE_ZONE in ${E2E_ZONES}
|
||||||
do
|
do
|
||||||
KUBE_GCE_ZONE="${KUBE_GCE_ZONE}" KUBE_USE_EXISTING_MASTER="${KUBE_USE_EXISTING_MASTER:-}" "${KUBE_ROOT}/cluster/kube-up.sh"
|
KUBE_GCE_ZONE="${KUBE_GCE_ZONE}" KUBE_USE_EXISTING_MASTER="${KUBE_USE_EXISTING_MASTER:-}" "${KUBE_ROOT}/cluster/kube-up.sh"
|
||||||
@ -1843,7 +1873,7 @@ function test-teardown() {
|
|||||||
delete-firewall-rules \
|
delete-firewall-rules \
|
||||||
"${NODE_TAG}-${INSTANCE_PREFIX}-http-alt" \
|
"${NODE_TAG}-${INSTANCE_PREFIX}-http-alt" \
|
||||||
"${NODE_TAG}-${INSTANCE_PREFIX}-nodeports"
|
"${NODE_TAG}-${INSTANCE_PREFIX}-nodeports"
|
||||||
if [[ ${MULTIZONE:-} == "true" ]]; then
|
if [[ ${MULTIZONE:-} == "true" && -n ${E2E_ZONES:-} ]]; then
|
||||||
local zones=( ${E2E_ZONES} )
|
local zones=( ${E2E_ZONES} )
|
||||||
# tear them down in reverse order, finally tearing down the master too.
|
# tear them down in reverse order, finally tearing down the master too.
|
||||||
for ((zone_num=${#zones[@]}-1; zone_num>0; zone_num--))
|
for ((zone_num=${#zones[@]}-1; zone_num>0; zone_num--))
|
||||||
|
@ -52,7 +52,12 @@ CLUSTER_READY_ADDITIONAL_TIME_SECONDS="${CLUSTER_READY_ADDITIONAL_TIME_SECONDS:-
|
|||||||
|
|
||||||
EXPECTED_NUM_NODES="${NUM_NODES}"
|
EXPECTED_NUM_NODES="${NUM_NODES}"
|
||||||
if [[ "${REGISTER_MASTER_KUBELET:-}" == "true" ]]; then
|
if [[ "${REGISTER_MASTER_KUBELET:-}" == "true" ]]; then
|
||||||
EXPECTED_NUM_NODES=$((EXPECTED_NUM_NODES+1))
|
if [[ "${KUBERNETES_PROVIDER:-}" == "gce" ]]; then
|
||||||
|
NUM_MASTERS=$(get-master-replicas-count)
|
||||||
|
else
|
||||||
|
NUM_MASTERS=1
|
||||||
|
fi
|
||||||
|
EXPECTED_NUM_NODES=$((EXPECTED_NUM_NODES+NUM_MASTERS))
|
||||||
fi
|
fi
|
||||||
REQUIRED_NUM_NODES=$((EXPECTED_NUM_NODES - ALLOWED_NOTREADY_NODES))
|
REQUIRED_NUM_NODES=$((EXPECTED_NUM_NODES - ALLOWED_NOTREADY_NODES))
|
||||||
# Make several attempts to deal with slow cluster birth.
|
# Make several attempts to deal with slow cluster birth.
|
||||||
|
@ -20,6 +20,7 @@ if [[ ! -z "${1:-}" ]]; then
|
|||||||
export KUBE_GCE_ZONE="${1}"
|
export KUBE_GCE_ZONE="${1}"
|
||||||
fi
|
fi
|
||||||
export KUBE_REPLICATE_EXISTING_MASTER=true
|
export KUBE_REPLICATE_EXISTING_MASTER=true
|
||||||
|
export MULTIZONE=true
|
||||||
|
|
||||||
source "${KUBE_ROOT}/hack/e2e-internal/e2e-up.sh"
|
source "${KUBE_ROOT}/hack/e2e-internal/e2e-up.sh"
|
||||||
|
|
||||||
|
@ -4011,6 +4011,43 @@ func WaitForClusterSize(c clientset.Interface, size int, timeout time.Duration)
|
|||||||
return fmt.Errorf("timeout waiting %v for cluster size to be %d", timeout, size)
|
return fmt.Errorf("timeout waiting %v for cluster size to be %d", timeout, size)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// waitForMasters waits until the cluster has the desired number of ready masters in it.
|
||||||
|
func WaitForMasters(masterPrefix string, c clientset.Interface, size int, timeout time.Duration) error {
|
||||||
|
for start := time.Now(); time.Since(start) < timeout; time.Sleep(20 * time.Second) {
|
||||||
|
nodes, err := c.Core().Nodes().List(api.ListOptions{})
|
||||||
|
if err != nil {
|
||||||
|
Logf("Failed to list nodes: %v", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Filter out nodes that are not master replicas
|
||||||
|
FilterNodes(nodes, func(node api.Node) bool {
|
||||||
|
res, err := regexp.Match(masterPrefix+"(-...)?", ([]byte)(node.Name))
|
||||||
|
if err != nil {
|
||||||
|
Logf("Failed to match regexp to node name: %v", err)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return res
|
||||||
|
})
|
||||||
|
|
||||||
|
numNodes := len(nodes.Items)
|
||||||
|
|
||||||
|
// Filter out not-ready nodes.
|
||||||
|
FilterNodes(nodes, func(node api.Node) bool {
|
||||||
|
return IsNodeConditionSetAsExpected(&node, api.NodeReady, true)
|
||||||
|
})
|
||||||
|
|
||||||
|
numReady := len(nodes.Items)
|
||||||
|
|
||||||
|
if numNodes == size && numReady == size {
|
||||||
|
Logf("Cluster has reached the desired number of masters %d", size)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
Logf("Waiting for the number of masters %d, current %d, not ready master nodes %d", size, numNodes, numNodes-numReady)
|
||||||
|
}
|
||||||
|
return fmt.Errorf("timeout waiting %v for the number of masters to be %d", timeout, size)
|
||||||
|
}
|
||||||
|
|
||||||
// GetHostExternalAddress gets the node for a pod and returns the first External
|
// GetHostExternalAddress gets the node for a pod and returns the first External
|
||||||
// address. Returns an error if the node the pod is on doesn't have an External
|
// address. Returns an error if the node the pod is on doesn't have an External
|
||||||
// address.
|
// address.
|
||||||
|
@ -23,6 +23,7 @@ import (
|
|||||||
"path"
|
"path"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
. "github.com/onsi/ginkgo"
|
. "github.com/onsi/ginkgo"
|
||||||
clientset "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset"
|
clientset "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset"
|
||||||
@ -63,7 +64,6 @@ func createNewRC(c clientset.Interface, ns string, name string) {
|
|||||||
func verifyNumberOfMasterReplicas(expected int) {
|
func verifyNumberOfMasterReplicas(expected int) {
|
||||||
output, err := exec.Command("gcloud", "compute", "instances", "list",
|
output, err := exec.Command("gcloud", "compute", "instances", "list",
|
||||||
"--project="+framework.TestContext.CloudConfig.ProjectID,
|
"--project="+framework.TestContext.CloudConfig.ProjectID,
|
||||||
"--zones="+framework.TestContext.CloudConfig.Zone,
|
|
||||||
"--regexp="+framework.TestContext.CloudConfig.MasterName+"(-...)?",
|
"--regexp="+framework.TestContext.CloudConfig.MasterName+"(-...)?",
|
||||||
"--filter=status=RUNNING",
|
"--filter=status=RUNNING",
|
||||||
"--format=[no-heading]").CombinedOutput()
|
"--format=[no-heading]").CombinedOutput()
|
||||||
@ -73,7 +73,7 @@ func verifyNumberOfMasterReplicas(expected int) {
|
|||||||
replicas := bytes.Count(output, newline)
|
replicas := bytes.Count(output, newline)
|
||||||
framework.Logf("Num master replicas/expected: %d/%d", replicas, expected)
|
framework.Logf("Num master replicas/expected: %d/%d", replicas, expected)
|
||||||
if replicas != expected {
|
if replicas != expected {
|
||||||
framework.Failf("Wrong number of master replicas")
|
framework.Failf("Wrong number of master replicas %d expected %d", replicas, expected)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -131,6 +131,8 @@ var _ = framework.KubeDescribe("HA-master [Feature:HAMaster]", func() {
|
|||||||
for _, zone := range additionalReplicaZones {
|
for _, zone := range additionalReplicaZones {
|
||||||
removeMasterReplica(zone)
|
removeMasterReplica(zone)
|
||||||
}
|
}
|
||||||
|
framework.WaitForMasters(framework.TestContext.CloudConfig.MasterName, c, 1, 10*time.Minute)
|
||||||
|
verifyNumberOfMasterReplicas(1)
|
||||||
})
|
})
|
||||||
|
|
||||||
type Action int
|
type Action int
|
||||||
@ -151,6 +153,7 @@ var _ = framework.KubeDescribe("HA-master [Feature:HAMaster]", func() {
|
|||||||
additionalReplicaZones = removeZoneFromZones(additionalReplicaZones, zone)
|
additionalReplicaZones = removeZoneFromZones(additionalReplicaZones, zone)
|
||||||
}
|
}
|
||||||
verifyNumberOfMasterReplicas(len(additionalReplicaZones) + 1)
|
verifyNumberOfMasterReplicas(len(additionalReplicaZones) + 1)
|
||||||
|
framework.WaitForMasters(framework.TestContext.CloudConfig.MasterName, c, len(additionalReplicaZones)+1, 10*time.Minute)
|
||||||
|
|
||||||
// Verify that API server works correctly with HA master.
|
// Verify that API server works correctly with HA master.
|
||||||
rcName := "ha-master-" + strconv.Itoa(len(existingRCs))
|
rcName := "ha-master-" + strconv.Itoa(len(existingRCs))
|
||||||
@ -159,16 +162,19 @@ var _ = framework.KubeDescribe("HA-master [Feature:HAMaster]", func() {
|
|||||||
verifyRCs(c, ns, existingRCs)
|
verifyRCs(c, ns, existingRCs)
|
||||||
}
|
}
|
||||||
|
|
||||||
It("pods survive addition/removal same zone [Slow]", func() {
|
It("survive addition/removal replicas same zone [Serial][Disruptive]", func() {
|
||||||
zone := framework.TestContext.CloudConfig.Zone
|
zone := framework.TestContext.CloudConfig.Zone
|
||||||
step(None, "")
|
step(None, "")
|
||||||
step(AddReplica, zone)
|
numAdditionalReplicas := 2
|
||||||
step(AddReplica, zone)
|
for i := 0; i < numAdditionalReplicas; i++ {
|
||||||
step(RemoveReplica, zone)
|
step(AddReplica, zone)
|
||||||
step(RemoveReplica, zone)
|
}
|
||||||
|
for i := 0; i < numAdditionalReplicas; i++ {
|
||||||
|
step(RemoveReplica, zone)
|
||||||
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
It("pods survive addition/removal different zones [Slow]", func() {
|
It("survive addition/removal replicas different zones [Serial][Disruptive]", func() {
|
||||||
zone := framework.TestContext.CloudConfig.Zone
|
zone := framework.TestContext.CloudConfig.Zone
|
||||||
region := findRegionForZone(zone)
|
region := findRegionForZone(zone)
|
||||||
zones := findZonesForRegion(region)
|
zones := findZonesForRegion(region)
|
||||||
|
Loading…
Reference in New Issue
Block a user