mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-29 14:37:00 +00:00
Respect PDBs during GCE node upgrades.
Respect PDBs during node upgrades and add test coverage to the ServiceTest upgrade test. Modified that test so that we include pod anti-affinity constraints and a PDB.
This commit is contained in:
parent
68dd748ba1
commit
775f2ef9a0
@ -343,51 +343,114 @@ function do-node-upgrade() {
|
|||||||
--zones="${ZONE}" \
|
--zones="${ZONE}" \
|
||||||
--regexp="${group}" \
|
--regexp="${group}" \
|
||||||
--format='value(instanceTemplate)' || true))
|
--format='value(instanceTemplate)' || true))
|
||||||
echo "== Calling rolling-update for ${group}. ==" >&2
|
set_instance_template_out=$(gcloud compute instance-groups managed set-instance-template "${group}" \
|
||||||
update=$(gcloud alpha compute rolling-updates \
|
--template="${template_name}" \
|
||||||
--project="${PROJECT}" \
|
--project="${PROJECT}" \
|
||||||
--zone="${ZONE}" \
|
--zone="${ZONE}" 2>&1) && set_instance_template_rc=$? || set_instance_template_rc=$?
|
||||||
start \
|
if [[ "${set_instance_template_rc}" != 0 ]]; then
|
||||||
--group="${group}" \
|
echo "== FAILED to set-instance-template for ${group} to ${template_name} =="
|
||||||
--template="${template_name}" \
|
echo "${set_instance_template_out}"
|
||||||
--instance-startup-timeout=300s \
|
return ${set_instance_template_rc}
|
||||||
--max-num-concurrent-instances=1 \
|
|
||||||
--max-num-failed-instances=0 \
|
|
||||||
--min-instance-update-time=0s 2>&1) && update_rc=$? || update_rc=$?
|
|
||||||
|
|
||||||
if [[ "${update_rc}" != 0 ]]; then
|
|
||||||
echo "== FAILED to start rolling-update: =="
|
|
||||||
echo "${update}"
|
|
||||||
echo " This may be due to a preexisting rolling-update;"
|
|
||||||
echo " see https://github.com/kubernetes/kubernetes/issues/33113 for details."
|
|
||||||
echo " All rolling-updates in project ${PROJECT} zone ${ZONE}:"
|
|
||||||
gcloud alpha compute rolling-updates \
|
|
||||||
--project="${PROJECT}" \
|
|
||||||
--zone="${ZONE}" \
|
|
||||||
list || true
|
|
||||||
return ${update_rc}
|
|
||||||
fi
|
fi
|
||||||
|
instances=()
|
||||||
id=$(echo "${update}" | grep "Started" | cut -d '/' -f 11 | cut -d ']' -f 1)
|
instances+=($(gcloud compute instance-groups managed list-instances "${group}" \
|
||||||
updates+=("${id}")
|
--format='value(instance)' \
|
||||||
done
|
--project="${PROJECT}" \
|
||||||
|
--zone="${ZONE}" 2>&1)) && list_instances_rc=$? || list_instances_rc=$?
|
||||||
echo "== Waiting for Upgrading nodes to be finished. ==" >&2
|
if [[ "${list_instances_rc}" != 0 ]]; then
|
||||||
# Wait until rolling updates are finished.
|
echo "== FAILED to list instances in group ${group} =="
|
||||||
for update in ${updates[@]}; do
|
echo "${instances}"
|
||||||
while true; do
|
return ${list_instances_rc}
|
||||||
result=$(gcloud alpha compute rolling-updates \
|
fi
|
||||||
--project="${PROJECT}" \
|
for instance in ${instances[@]}; do
|
||||||
--zone="${ZONE}" \
|
# Cache instance id for later
|
||||||
describe \
|
instance_id=$(gcloud compute instances describe "${instance}" \
|
||||||
${update} \
|
--format='get(id)' \
|
||||||
--format='value(status)' || true)
|
--project="${PROJECT}" \
|
||||||
if [ $result = "ROLLED_OUT" ]; then
|
--zone="${ZONE}" 2>&1) && describe_rc=$? || describe_rc=$?
|
||||||
echo "Rolling update ${update} is ${result} state and finished."
|
if [[ "${describe_rc}" != 0 ]]; then
|
||||||
break
|
echo "== FAILED to describe ${instance} =="
|
||||||
|
echo "${instance_id}"
|
||||||
|
return ${describe_rc}
|
||||||
fi
|
fi
|
||||||
echo "Rolling update ${update} is still in ${result} state."
|
|
||||||
sleep 10
|
# Drain node
|
||||||
|
echo "== Draining ${instance}. == " >&2
|
||||||
|
"${KUBE_ROOT}/cluster/kubectl.sh" drain --delete-local-data --force --ignore-daemonsets "${instance}" \
|
||||||
|
&& drain_rc=$? || drain_rc=$?
|
||||||
|
if [[ "${drain_rc}" != 0 ]]; then
|
||||||
|
echo "== FAILED to drain ${instance} =="
|
||||||
|
return ${drain_rc}
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Recreate instance
|
||||||
|
echo "== Recreating instance ${instance}. ==" >&2
|
||||||
|
recreate=$(gcloud compute instance-groups managed recreate-instances "${group}" \
|
||||||
|
--project="${PROJECT}" \
|
||||||
|
--zone="${ZONE}" \
|
||||||
|
--instances="${instance}" 2>&1) && recreate_rc=$? || recreate_rc=$?
|
||||||
|
if [[ "${recreate_rc}" != 0 ]]; then
|
||||||
|
echo "== FAILED to recreate ${instance} =="
|
||||||
|
echo "${recreate}"
|
||||||
|
return ${recreate_rc}
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Wait for instance to be recreated
|
||||||
|
echo "== Waiting for instance ${instance} to be recreated. ==" >&2
|
||||||
|
while true; do
|
||||||
|
new_instance_id=$(gcloud compute instances describe "${instance}" \
|
||||||
|
--format='get(id)' \
|
||||||
|
--project="${PROJECT}" \
|
||||||
|
--zone="${ZONE}" 2>&1) && describe_rc=$? || describe_rc=$?
|
||||||
|
if [[ "${describe_rc}" != 0 ]]; then
|
||||||
|
echo "== FAILED to describe ${instance} =="
|
||||||
|
echo "${new_instance_id}"
|
||||||
|
echo " (Will retry.)"
|
||||||
|
elif [[ "${new_instance_id}" == "${instance_id}" ]]; then
|
||||||
|
echo -n .
|
||||||
|
else
|
||||||
|
echo "Instance ${instance} recreated."
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
|
||||||
|
# Wait for k8s node object to reflect new instance id
|
||||||
|
echo "== Waiting for new node to be added to k8s. ==" >&2
|
||||||
|
while true; do
|
||||||
|
external_id=$("${KUBE_ROOT}/cluster/kubectl.sh" get node "${instance}" --output=jsonpath='{.spec.externalID}' 2>&1) && kubectl_rc=$? || kubectl_rc=$?
|
||||||
|
if [[ "${kubectl_rc}" != 0 ]]; then
|
||||||
|
echo "== FAILED to get node ${instance} =="
|
||||||
|
echo "${external_id}"
|
||||||
|
echo " (Will retry.)"
|
||||||
|
elif [[ "${external_id}" == "${new_instance_id}" ]]; then
|
||||||
|
echo "Node ${instance} recreated."
|
||||||
|
break
|
||||||
|
elif [[ "${external_id}" == "${instance_id}" ]]; then
|
||||||
|
echo -n .
|
||||||
|
else
|
||||||
|
echo "Unexpected external_id '${external_id}' matches neither old ('${instance_id}') nor new ('${new_instance_id}')."
|
||||||
|
echo " (Will retry.)"
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
|
||||||
|
# Wait for the node to not have SchedulingDisabled=True and also to have
|
||||||
|
# Ready=True.
|
||||||
|
echo "== Waiting for ${instance} to become ready. ==" >&2
|
||||||
|
while true; do
|
||||||
|
cordoned=$("${KUBE_ROOT}/cluster/kubectl.sh" get node "${instance}" --output='jsonpath={.status.conditions[?(@.type == "SchedulingDisabled")].status}')
|
||||||
|
ready=$("${KUBE_ROOT}/cluster/kubectl.sh" get node "${instance}" --output='jsonpath={.status.conditions[?(@.type == "Ready")].status}')
|
||||||
|
if [[ "${cordoned}" == 'True' ]]; then
|
||||||
|
echo "Node ${instance} is still not ready: SchedulingDisabled=${ready}"
|
||||||
|
elif [[ "${ready}" != 'True' ]]; then
|
||||||
|
echo "Node ${instance} is still not ready: Ready=${ready}"
|
||||||
|
else
|
||||||
|
echo "Node ${instance} Ready=${ready}"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
done
|
done
|
||||||
done
|
done
|
||||||
|
|
||||||
|
@ -34,6 +34,7 @@ import (
|
|||||||
"k8s.io/apimachinery/pkg/util/uuid"
|
"k8s.io/apimachinery/pkg/util/uuid"
|
||||||
"k8s.io/apimachinery/pkg/util/wait"
|
"k8s.io/apimachinery/pkg/util/wait"
|
||||||
"k8s.io/kubernetes/pkg/api/v1"
|
"k8s.io/kubernetes/pkg/api/v1"
|
||||||
|
policyv1beta1 "k8s.io/kubernetes/pkg/apis/policy/v1beta1"
|
||||||
"k8s.io/kubernetes/pkg/client/clientset_generated/clientset"
|
"k8s.io/kubernetes/pkg/client/clientset_generated/clientset"
|
||||||
"k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset"
|
"k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset"
|
||||||
"k8s.io/kubernetes/pkg/client/retry"
|
"k8s.io/kubernetes/pkg/client/retry"
|
||||||
@ -510,6 +511,8 @@ func (j *ServiceTestJig) WaitForLoadBalancerDestroyOrFail(namespace, name string
|
|||||||
// this jig, but does not actually create the RC. The default RC has the same
|
// this jig, but does not actually create the RC. The default RC has the same
|
||||||
// name as the jig and runs the "netexec" container.
|
// name as the jig and runs the "netexec" container.
|
||||||
func (j *ServiceTestJig) newRCTemplate(namespace string) *v1.ReplicationController {
|
func (j *ServiceTestJig) newRCTemplate(namespace string) *v1.ReplicationController {
|
||||||
|
var replicas int32 = 1
|
||||||
|
|
||||||
rc := &v1.ReplicationController{
|
rc := &v1.ReplicationController{
|
||||||
ObjectMeta: metav1.ObjectMeta{
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
Namespace: namespace,
|
Namespace: namespace,
|
||||||
@ -517,7 +520,7 @@ func (j *ServiceTestJig) newRCTemplate(namespace string) *v1.ReplicationControll
|
|||||||
Labels: j.Labels,
|
Labels: j.Labels,
|
||||||
},
|
},
|
||||||
Spec: v1.ReplicationControllerSpec{
|
Spec: v1.ReplicationControllerSpec{
|
||||||
Replicas: func(i int) *int32 { x := int32(i); return &x }(1),
|
Replicas: &replicas,
|
||||||
Selector: j.Labels,
|
Selector: j.Labels,
|
||||||
Template: &v1.PodTemplateSpec{
|
Template: &v1.PodTemplateSpec{
|
||||||
ObjectMeta: metav1.ObjectMeta{
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
@ -548,6 +551,59 @@ func (j *ServiceTestJig) newRCTemplate(namespace string) *v1.ReplicationControll
|
|||||||
return rc
|
return rc
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (j *ServiceTestJig) AddRCAntiAffinity(rc *v1.ReplicationController) {
|
||||||
|
var replicas int32 = 2
|
||||||
|
|
||||||
|
rc.Spec.Replicas = &replicas
|
||||||
|
if rc.Spec.Template.Spec.Affinity == nil {
|
||||||
|
rc.Spec.Template.Spec.Affinity = &v1.Affinity{}
|
||||||
|
}
|
||||||
|
if rc.Spec.Template.Spec.Affinity.PodAntiAffinity == nil {
|
||||||
|
rc.Spec.Template.Spec.Affinity.PodAntiAffinity = &v1.PodAntiAffinity{}
|
||||||
|
}
|
||||||
|
rc.Spec.Template.Spec.Affinity.PodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution = append(
|
||||||
|
rc.Spec.Template.Spec.Affinity.PodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution,
|
||||||
|
v1.PodAffinityTerm{
|
||||||
|
LabelSelector: &metav1.LabelSelector{MatchLabels: j.Labels},
|
||||||
|
Namespaces: nil,
|
||||||
|
TopologyKey: "kubernetes.io/hostname",
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (j *ServiceTestJig) CreatePDBOrFail(namespace string, rc *v1.ReplicationController) *policyv1beta1.PodDisruptionBudget {
|
||||||
|
pdb := j.newPDBTemplate(namespace, rc)
|
||||||
|
newPdb, err := j.Client.Policy().PodDisruptionBudgets(namespace).Create(pdb)
|
||||||
|
if err != nil {
|
||||||
|
Failf("Failed to create PDB %q %v", pdb.Name, err)
|
||||||
|
}
|
||||||
|
if err := j.waitForPdbReady(namespace); err != nil {
|
||||||
|
Failf("Failed waiting for PDB to be ready: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return newPdb
|
||||||
|
}
|
||||||
|
|
||||||
|
// newPDBTemplate returns the default policyv1beta1.PodDisruptionBudget object for
|
||||||
|
// this jig, but does not actually create the PDB. The default PDB specifies a
|
||||||
|
// MinAvailable of N-1 and matches the pods created by the RC.
|
||||||
|
func (j *ServiceTestJig) newPDBTemplate(namespace string, rc *v1.ReplicationController) *policyv1beta1.PodDisruptionBudget {
|
||||||
|
minAvailable := intstr.FromInt(int(*rc.Spec.Replicas) - 1)
|
||||||
|
|
||||||
|
pdb := &policyv1beta1.PodDisruptionBudget{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
|
Namespace: namespace,
|
||||||
|
Name: j.Name,
|
||||||
|
Labels: j.Labels,
|
||||||
|
},
|
||||||
|
Spec: policyv1beta1.PodDisruptionBudgetSpec{
|
||||||
|
MinAvailable: &minAvailable,
|
||||||
|
Selector: &metav1.LabelSelector{MatchLabels: j.Labels},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
return pdb
|
||||||
|
}
|
||||||
|
|
||||||
// RunOrFail creates a ReplicationController and Pod(s) and waits for the
|
// RunOrFail creates a ReplicationController and Pod(s) and waits for the
|
||||||
// Pod(s) to be running. Callers can provide a function to tweak the RC object
|
// Pod(s) to be running. Callers can provide a function to tweak the RC object
|
||||||
// before it is created.
|
// before it is created.
|
||||||
@ -558,7 +614,7 @@ func (j *ServiceTestJig) RunOrFail(namespace string, tweak func(rc *v1.Replicati
|
|||||||
}
|
}
|
||||||
result, err := j.Client.Core().ReplicationControllers(namespace).Create(rc)
|
result, err := j.Client.Core().ReplicationControllers(namespace).Create(rc)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
Failf("Failed to created RC %q: %v", rc.Name, err)
|
Failf("Failed to create RC %q: %v", rc.Name, err)
|
||||||
}
|
}
|
||||||
pods, err := j.waitForPodsCreated(namespace, int(*(rc.Spec.Replicas)))
|
pods, err := j.waitForPodsCreated(namespace, int(*(rc.Spec.Replicas)))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -570,6 +626,21 @@ func (j *ServiceTestJig) RunOrFail(namespace string, tweak func(rc *v1.Replicati
|
|||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (j *ServiceTestJig) waitForPdbReady(namespace string) error {
|
||||||
|
timeout := 2 * time.Minute
|
||||||
|
for start := time.Now(); time.Since(start) < timeout; time.Sleep(2 * time.Second) {
|
||||||
|
pdb, err := j.Client.Policy().PodDisruptionBudgets(namespace).Get(j.Name, metav1.GetOptions{})
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if pdb.Status.PodDisruptionsAllowed > 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return fmt.Errorf("Timeout waiting for PDB %q to be ready", j.Name)
|
||||||
|
}
|
||||||
|
|
||||||
func (j *ServiceTestJig) waitForPodsCreated(namespace string, replicas int) ([]string, error) {
|
func (j *ServiceTestJig) waitForPodsCreated(namespace string, replicas int) ([]string, error) {
|
||||||
timeout := 2 * time.Minute
|
timeout := 2 * time.Minute
|
||||||
// List the pods, making sure we observe all the replicas.
|
// List the pods, making sure we observe all the replicas.
|
||||||
|
@ -36,6 +36,8 @@ type ServiceUpgradeTest struct {
|
|||||||
|
|
||||||
func (ServiceUpgradeTest) Name() string { return "service-upgrade" }
|
func (ServiceUpgradeTest) Name() string { return "service-upgrade" }
|
||||||
|
|
||||||
|
func shouldTestPDBs() bool { return framework.ProviderIs("gce", "gke") }
|
||||||
|
|
||||||
// Setup creates a service with a load balancer and makes sure it's reachable.
|
// Setup creates a service with a load balancer and makes sure it's reachable.
|
||||||
func (t *ServiceUpgradeTest) Setup(f *framework.Framework) {
|
func (t *ServiceUpgradeTest) Setup(f *framework.Framework) {
|
||||||
serviceName := "service-test"
|
serviceName := "service-test"
|
||||||
@ -55,7 +57,12 @@ func (t *ServiceUpgradeTest) Setup(f *framework.Framework) {
|
|||||||
svcPort := int(tcpService.Spec.Ports[0].Port)
|
svcPort := int(tcpService.Spec.Ports[0].Port)
|
||||||
|
|
||||||
By("creating pod to be part of service " + serviceName)
|
By("creating pod to be part of service " + serviceName)
|
||||||
jig.RunOrFail(ns.Name, nil)
|
rc := jig.RunOrFail(ns.Name, jig.AddRCAntiAffinity)
|
||||||
|
|
||||||
|
if shouldTestPDBs() {
|
||||||
|
By("creating a PodDisruptionBudget to cover the ReplicationController")
|
||||||
|
jig.CreatePDBOrFail(ns.Name, rc)
|
||||||
|
}
|
||||||
|
|
||||||
// Hit it once before considering ourselves ready
|
// Hit it once before considering ourselves ready
|
||||||
By("hitting the pod through the service's LoadBalancer")
|
By("hitting the pod through the service's LoadBalancer")
|
||||||
@ -72,6 +79,9 @@ func (t *ServiceUpgradeTest) Test(f *framework.Framework, done <-chan struct{},
|
|||||||
switch upgrade {
|
switch upgrade {
|
||||||
case MasterUpgrade:
|
case MasterUpgrade:
|
||||||
t.test(f, done, true)
|
t.test(f, done, true)
|
||||||
|
case NodeUpgrade:
|
||||||
|
// Node upgrades should test during disruption only on GCE/GKE for now.
|
||||||
|
t.test(f, done, shouldTestPDBs())
|
||||||
default:
|
default:
|
||||||
t.test(f, done, false)
|
t.test(f, done, false)
|
||||||
}
|
}
|
||||||
@ -87,7 +97,7 @@ func (t *ServiceUpgradeTest) test(f *framework.Framework, done <-chan struct{},
|
|||||||
// Continuous validation
|
// Continuous validation
|
||||||
By("continuously hitting the pod through the service's LoadBalancer")
|
By("continuously hitting the pod through the service's LoadBalancer")
|
||||||
wait.Until(func() {
|
wait.Until(func() {
|
||||||
t.jig.TestReachableHTTP(t.tcpIngressIP, t.svcPort, framework.Poll)
|
t.jig.TestReachableHTTP(t.tcpIngressIP, t.svcPort, framework.LoadBalancerLagTimeoutDefault)
|
||||||
}, framework.Poll, done)
|
}, framework.Poll, done)
|
||||||
} else {
|
} else {
|
||||||
// Block until upgrade is done
|
// Block until upgrade is done
|
||||||
|
Loading…
Reference in New Issue
Block a user