mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-24 12:15:52 +00:00
Respect PDBs during GCE node upgrades.
Respect PDBs during node upgrades and add test coverage to the ServiceTest upgrade test. Modified that test so that we include pod anti-affinity constraints and a PDB.
This commit is contained in:
parent
68dd748ba1
commit
775f2ef9a0
@ -343,51 +343,114 @@ function do-node-upgrade() {
|
||||
--zones="${ZONE}" \
|
||||
--regexp="${group}" \
|
||||
--format='value(instanceTemplate)' || true))
|
||||
echo "== Calling rolling-update for ${group}. ==" >&2
|
||||
update=$(gcloud alpha compute rolling-updates \
|
||||
--project="${PROJECT}" \
|
||||
--zone="${ZONE}" \
|
||||
start \
|
||||
--group="${group}" \
|
||||
--template="${template_name}" \
|
||||
--instance-startup-timeout=300s \
|
||||
--max-num-concurrent-instances=1 \
|
||||
--max-num-failed-instances=0 \
|
||||
--min-instance-update-time=0s 2>&1) && update_rc=$? || update_rc=$?
|
||||
|
||||
if [[ "${update_rc}" != 0 ]]; then
|
||||
echo "== FAILED to start rolling-update: =="
|
||||
echo "${update}"
|
||||
echo " This may be due to a preexisting rolling-update;"
|
||||
echo " see https://github.com/kubernetes/kubernetes/issues/33113 for details."
|
||||
echo " All rolling-updates in project ${PROJECT} zone ${ZONE}:"
|
||||
gcloud alpha compute rolling-updates \
|
||||
--project="${PROJECT}" \
|
||||
--zone="${ZONE}" \
|
||||
list || true
|
||||
return ${update_rc}
|
||||
set_instance_template_out=$(gcloud compute instance-groups managed set-instance-template "${group}" \
|
||||
--template="${template_name}" \
|
||||
--project="${PROJECT}" \
|
||||
--zone="${ZONE}" 2>&1) && set_instance_template_rc=$? || set_instance_template_rc=$?
|
||||
if [[ "${set_instance_template_rc}" != 0 ]]; then
|
||||
echo "== FAILED to set-instance-template for ${group} to ${template_name} =="
|
||||
echo "${set_instance_template_out}"
|
||||
return ${set_instance_template_rc}
|
||||
fi
|
||||
|
||||
id=$(echo "${update}" | grep "Started" | cut -d '/' -f 11 | cut -d ']' -f 1)
|
||||
updates+=("${id}")
|
||||
done
|
||||
|
||||
echo "== Waiting for Upgrading nodes to be finished. ==" >&2
|
||||
# Wait until rolling updates are finished.
|
||||
for update in ${updates[@]}; do
|
||||
while true; do
|
||||
result=$(gcloud alpha compute rolling-updates \
|
||||
--project="${PROJECT}" \
|
||||
--zone="${ZONE}" \
|
||||
describe \
|
||||
${update} \
|
||||
--format='value(status)' || true)
|
||||
if [ $result = "ROLLED_OUT" ]; then
|
||||
echo "Rolling update ${update} is ${result} state and finished."
|
||||
break
|
||||
instances=()
|
||||
instances+=($(gcloud compute instance-groups managed list-instances "${group}" \
|
||||
--format='value(instance)' \
|
||||
--project="${PROJECT}" \
|
||||
--zone="${ZONE}" 2>&1)) && list_instances_rc=$? || list_instances_rc=$?
|
||||
if [[ "${list_instances_rc}" != 0 ]]; then
|
||||
echo "== FAILED to list instances in group ${group} =="
|
||||
echo "${instances}"
|
||||
return ${list_instances_rc}
|
||||
fi
|
||||
for instance in ${instances[@]}; do
|
||||
# Cache instance id for later
|
||||
instance_id=$(gcloud compute instances describe "${instance}" \
|
||||
--format='get(id)' \
|
||||
--project="${PROJECT}" \
|
||||
--zone="${ZONE}" 2>&1) && describe_rc=$? || describe_rc=$?
|
||||
if [[ "${describe_rc}" != 0 ]]; then
|
||||
echo "== FAILED to describe ${instance} =="
|
||||
echo "${instance_id}"
|
||||
return ${describe_rc}
|
||||
fi
|
||||
echo "Rolling update ${update} is still in ${result} state."
|
||||
sleep 10
|
||||
|
||||
# Drain node
|
||||
echo "== Draining ${instance}. == " >&2
|
||||
"${KUBE_ROOT}/cluster/kubectl.sh" drain --delete-local-data --force --ignore-daemonsets "${instance}" \
|
||||
&& drain_rc=$? || drain_rc=$?
|
||||
if [[ "${drain_rc}" != 0 ]]; then
|
||||
echo "== FAILED to drain ${instance} =="
|
||||
return ${drain_rc}
|
||||
fi
|
||||
|
||||
# Recreate instance
|
||||
echo "== Recreating instance ${instance}. ==" >&2
|
||||
recreate=$(gcloud compute instance-groups managed recreate-instances "${group}" \
|
||||
--project="${PROJECT}" \
|
||||
--zone="${ZONE}" \
|
||||
--instances="${instance}" 2>&1) && recreate_rc=$? || recreate_rc=$?
|
||||
if [[ "${recreate_rc}" != 0 ]]; then
|
||||
echo "== FAILED to recreate ${instance} =="
|
||||
echo "${recreate}"
|
||||
return ${recreate_rc}
|
||||
fi
|
||||
|
||||
# Wait for instance to be recreated
|
||||
echo "== Waiting for instance ${instance} to be recreated. ==" >&2
|
||||
while true; do
|
||||
new_instance_id=$(gcloud compute instances describe "${instance}" \
|
||||
--format='get(id)' \
|
||||
--project="${PROJECT}" \
|
||||
--zone="${ZONE}" 2>&1) && describe_rc=$? || describe_rc=$?
|
||||
if [[ "${describe_rc}" != 0 ]]; then
|
||||
echo "== FAILED to describe ${instance} =="
|
||||
echo "${new_instance_id}"
|
||||
echo " (Will retry.)"
|
||||
elif [[ "${new_instance_id}" == "${instance_id}" ]]; then
|
||||
echo -n .
|
||||
else
|
||||
echo "Instance ${instance} recreated."
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
|
||||
# Wait for k8s node object to reflect new instance id
|
||||
echo "== Waiting for new node to be added to k8s. ==" >&2
|
||||
while true; do
|
||||
external_id=$("${KUBE_ROOT}/cluster/kubectl.sh" get node "${instance}" --output=jsonpath='{.spec.externalID}' 2>&1) && kubectl_rc=$? || kubectl_rc=$?
|
||||
if [[ "${kubectl_rc}" != 0 ]]; then
|
||||
echo "== FAILED to get node ${instance} =="
|
||||
echo "${external_id}"
|
||||
echo " (Will retry.)"
|
||||
elif [[ "${external_id}" == "${new_instance_id}" ]]; then
|
||||
echo "Node ${instance} recreated."
|
||||
break
|
||||
elif [[ "${external_id}" == "${instance_id}" ]]; then
|
||||
echo -n .
|
||||
else
|
||||
echo "Unexpected external_id '${external_id}' matches neither old ('${instance_id}') nor new ('${new_instance_id}')."
|
||||
echo " (Will retry.)"
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
|
||||
# Wait for the node to not have SchedulingDisabled=True and also to have
|
||||
# Ready=True.
|
||||
echo "== Waiting for ${instance} to become ready. ==" >&2
|
||||
while true; do
|
||||
cordoned=$("${KUBE_ROOT}/cluster/kubectl.sh" get node "${instance}" --output='jsonpath={.status.conditions[?(@.type == "SchedulingDisabled")].status}')
|
||||
ready=$("${KUBE_ROOT}/cluster/kubectl.sh" get node "${instance}" --output='jsonpath={.status.conditions[?(@.type == "Ready")].status}')
|
||||
if [[ "${cordoned}" == 'True' ]]; then
|
||||
echo "Node ${instance} is still not ready: SchedulingDisabled=${ready}"
|
||||
elif [[ "${ready}" != 'True' ]]; then
|
||||
echo "Node ${instance} is still not ready: Ready=${ready}"
|
||||
else
|
||||
echo "Node ${instance} Ready=${ready}"
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
done
|
||||
done
|
||||
|
||||
|
@ -34,6 +34,7 @@ import (
|
||||
"k8s.io/apimachinery/pkg/util/uuid"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
"k8s.io/kubernetes/pkg/api/v1"
|
||||
policyv1beta1 "k8s.io/kubernetes/pkg/apis/policy/v1beta1"
|
||||
"k8s.io/kubernetes/pkg/client/clientset_generated/clientset"
|
||||
"k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset"
|
||||
"k8s.io/kubernetes/pkg/client/retry"
|
||||
@ -510,6 +511,8 @@ func (j *ServiceTestJig) WaitForLoadBalancerDestroyOrFail(namespace, name string
|
||||
// this jig, but does not actually create the RC. The default RC has the same
|
||||
// name as the jig and runs the "netexec" container.
|
||||
func (j *ServiceTestJig) newRCTemplate(namespace string) *v1.ReplicationController {
|
||||
var replicas int32 = 1
|
||||
|
||||
rc := &v1.ReplicationController{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Namespace: namespace,
|
||||
@ -517,7 +520,7 @@ func (j *ServiceTestJig) newRCTemplate(namespace string) *v1.ReplicationControll
|
||||
Labels: j.Labels,
|
||||
},
|
||||
Spec: v1.ReplicationControllerSpec{
|
||||
Replicas: func(i int) *int32 { x := int32(i); return &x }(1),
|
||||
Replicas: &replicas,
|
||||
Selector: j.Labels,
|
||||
Template: &v1.PodTemplateSpec{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
@ -548,6 +551,59 @@ func (j *ServiceTestJig) newRCTemplate(namespace string) *v1.ReplicationControll
|
||||
return rc
|
||||
}
|
||||
|
||||
func (j *ServiceTestJig) AddRCAntiAffinity(rc *v1.ReplicationController) {
|
||||
var replicas int32 = 2
|
||||
|
||||
rc.Spec.Replicas = &replicas
|
||||
if rc.Spec.Template.Spec.Affinity == nil {
|
||||
rc.Spec.Template.Spec.Affinity = &v1.Affinity{}
|
||||
}
|
||||
if rc.Spec.Template.Spec.Affinity.PodAntiAffinity == nil {
|
||||
rc.Spec.Template.Spec.Affinity.PodAntiAffinity = &v1.PodAntiAffinity{}
|
||||
}
|
||||
rc.Spec.Template.Spec.Affinity.PodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution = append(
|
||||
rc.Spec.Template.Spec.Affinity.PodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution,
|
||||
v1.PodAffinityTerm{
|
||||
LabelSelector: &metav1.LabelSelector{MatchLabels: j.Labels},
|
||||
Namespaces: nil,
|
||||
TopologyKey: "kubernetes.io/hostname",
|
||||
})
|
||||
}
|
||||
|
||||
func (j *ServiceTestJig) CreatePDBOrFail(namespace string, rc *v1.ReplicationController) *policyv1beta1.PodDisruptionBudget {
|
||||
pdb := j.newPDBTemplate(namespace, rc)
|
||||
newPdb, err := j.Client.Policy().PodDisruptionBudgets(namespace).Create(pdb)
|
||||
if err != nil {
|
||||
Failf("Failed to create PDB %q %v", pdb.Name, err)
|
||||
}
|
||||
if err := j.waitForPdbReady(namespace); err != nil {
|
||||
Failf("Failed waiting for PDB to be ready: %v", err)
|
||||
}
|
||||
|
||||
return newPdb
|
||||
}
|
||||
|
||||
// newPDBTemplate returns the default policyv1beta1.PodDisruptionBudget object for
|
||||
// this jig, but does not actually create the PDB. The default PDB specifies a
|
||||
// MinAvailable of N-1 and matches the pods created by the RC.
|
||||
func (j *ServiceTestJig) newPDBTemplate(namespace string, rc *v1.ReplicationController) *policyv1beta1.PodDisruptionBudget {
|
||||
minAvailable := intstr.FromInt(int(*rc.Spec.Replicas) - 1)
|
||||
|
||||
pdb := &policyv1beta1.PodDisruptionBudget{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Namespace: namespace,
|
||||
Name: j.Name,
|
||||
Labels: j.Labels,
|
||||
},
|
||||
Spec: policyv1beta1.PodDisruptionBudgetSpec{
|
||||
MinAvailable: &minAvailable,
|
||||
Selector: &metav1.LabelSelector{MatchLabels: j.Labels},
|
||||
},
|
||||
}
|
||||
|
||||
return pdb
|
||||
}
|
||||
|
||||
// RunOrFail creates a ReplicationController and Pod(s) and waits for the
|
||||
// Pod(s) to be running. Callers can provide a function to tweak the RC object
|
||||
// before it is created.
|
||||
@ -558,7 +614,7 @@ func (j *ServiceTestJig) RunOrFail(namespace string, tweak func(rc *v1.Replicati
|
||||
}
|
||||
result, err := j.Client.Core().ReplicationControllers(namespace).Create(rc)
|
||||
if err != nil {
|
||||
Failf("Failed to created RC %q: %v", rc.Name, err)
|
||||
Failf("Failed to create RC %q: %v", rc.Name, err)
|
||||
}
|
||||
pods, err := j.waitForPodsCreated(namespace, int(*(rc.Spec.Replicas)))
|
||||
if err != nil {
|
||||
@ -570,6 +626,21 @@ func (j *ServiceTestJig) RunOrFail(namespace string, tweak func(rc *v1.Replicati
|
||||
return result
|
||||
}
|
||||
|
||||
func (j *ServiceTestJig) waitForPdbReady(namespace string) error {
|
||||
timeout := 2 * time.Minute
|
||||
for start := time.Now(); time.Since(start) < timeout; time.Sleep(2 * time.Second) {
|
||||
pdb, err := j.Client.Policy().PodDisruptionBudgets(namespace).Get(j.Name, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if pdb.Status.PodDisruptionsAllowed > 0 {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
return fmt.Errorf("Timeout waiting for PDB %q to be ready", j.Name)
|
||||
}
|
||||
|
||||
func (j *ServiceTestJig) waitForPodsCreated(namespace string, replicas int) ([]string, error) {
|
||||
timeout := 2 * time.Minute
|
||||
// List the pods, making sure we observe all the replicas.
|
||||
|
@ -36,6 +36,8 @@ type ServiceUpgradeTest struct {
|
||||
|
||||
func (ServiceUpgradeTest) Name() string { return "service-upgrade" }
|
||||
|
||||
func shouldTestPDBs() bool { return framework.ProviderIs("gce", "gke") }
|
||||
|
||||
// Setup creates a service with a load balancer and makes sure it's reachable.
|
||||
func (t *ServiceUpgradeTest) Setup(f *framework.Framework) {
|
||||
serviceName := "service-test"
|
||||
@ -55,7 +57,12 @@ func (t *ServiceUpgradeTest) Setup(f *framework.Framework) {
|
||||
svcPort := int(tcpService.Spec.Ports[0].Port)
|
||||
|
||||
By("creating pod to be part of service " + serviceName)
|
||||
jig.RunOrFail(ns.Name, nil)
|
||||
rc := jig.RunOrFail(ns.Name, jig.AddRCAntiAffinity)
|
||||
|
||||
if shouldTestPDBs() {
|
||||
By("creating a PodDisruptionBudget to cover the ReplicationController")
|
||||
jig.CreatePDBOrFail(ns.Name, rc)
|
||||
}
|
||||
|
||||
// Hit it once before considering ourselves ready
|
||||
By("hitting the pod through the service's LoadBalancer")
|
||||
@ -72,6 +79,9 @@ func (t *ServiceUpgradeTest) Test(f *framework.Framework, done <-chan struct{},
|
||||
switch upgrade {
|
||||
case MasterUpgrade:
|
||||
t.test(f, done, true)
|
||||
case NodeUpgrade:
|
||||
// Node upgrades should test during disruption only on GCE/GKE for now.
|
||||
t.test(f, done, shouldTestPDBs())
|
||||
default:
|
||||
t.test(f, done, false)
|
||||
}
|
||||
@ -87,7 +97,7 @@ func (t *ServiceUpgradeTest) test(f *framework.Framework, done <-chan struct{},
|
||||
// Continuous validation
|
||||
By("continuously hitting the pod through the service's LoadBalancer")
|
||||
wait.Until(func() {
|
||||
t.jig.TestReachableHTTP(t.tcpIngressIP, t.svcPort, framework.Poll)
|
||||
t.jig.TestReachableHTTP(t.tcpIngressIP, t.svcPort, framework.LoadBalancerLagTimeoutDefault)
|
||||
}, framework.Poll, done)
|
||||
} else {
|
||||
// Block until upgrade is done
|
||||
|
Loading…
Reference in New Issue
Block a user