Merge pull request #103414 from ravisantoshgudimetla/fix-pdb-status

[disruptioncontroller] Don't error for unmanaged pods
This commit is contained in:
Kubernetes Prow Robot 2021-07-07 12:40:35 -07:00 committed by GitHub
commit ac6a1b1821
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 105 additions and 10 deletions

View File

@ -595,11 +595,16 @@ func (dc *DisruptionController) trySync(pdb *policy.PodDisruptionBudget) error {
dc.recorder.Eventf(pdb, v1.EventTypeNormal, "NoPods", "No matching pods found")
}
expectedCount, desiredHealthy, err := dc.getExpectedPodCount(pdb, pods)
expectedCount, desiredHealthy, unmanagedPods, err := dc.getExpectedPodCount(pdb, pods)
if err != nil {
dc.recorder.Eventf(pdb, v1.EventTypeWarning, "CalculateExpectedPodCountFailed", "Failed to calculate the number of expected pods: %v", err)
return err
}
// We have unmamanged pods, instead of erroring and hotlooping in disruption controller, log and continue.
if len(unmanagedPods) > 0 {
klog.V(4).Infof("found unmanaged pods associated with this PDB: %v",
strings.Join(unmanagedPods, ",'"))
}
currentTime := time.Now()
disruptedPods, recheckTime := dc.buildDisruptedPodMap(pods, pdb, currentTime)
@ -615,7 +620,7 @@ func (dc *DisruptionController) trySync(pdb *policy.PodDisruptionBudget) error {
return err
}
func (dc *DisruptionController) getExpectedPodCount(pdb *policy.PodDisruptionBudget, pods []*v1.Pod) (expectedCount, desiredHealthy int32, err error) {
func (dc *DisruptionController) getExpectedPodCount(pdb *policy.PodDisruptionBudget, pods []*v1.Pod) (expectedCount, desiredHealthy int32, unmanagedPods []string, err error) {
err = nil
// TODO(davidopp): consider making the way expectedCount and rules about
// permitted controller configurations (specifically, considering it an error
@ -623,7 +628,7 @@ func (dc *DisruptionController) getExpectedPodCount(pdb *policy.PodDisruptionBud
// handled the same way for integer and percentage minAvailable
if pdb.Spec.MaxUnavailable != nil {
expectedCount, err = dc.getExpectedScale(pdb, pods)
expectedCount, unmanagedPods, err = dc.getExpectedScale(pdb, pods)
if err != nil {
return
}
@ -641,7 +646,7 @@ func (dc *DisruptionController) getExpectedPodCount(pdb *policy.PodDisruptionBud
desiredHealthy = pdb.Spec.MinAvailable.IntVal
expectedCount = int32(len(pods))
} else if pdb.Spec.MinAvailable.Type == intstr.String {
expectedCount, err = dc.getExpectedScale(pdb, pods)
expectedCount, unmanagedPods, err = dc.getExpectedScale(pdb, pods)
if err != nil {
return
}
@ -657,7 +662,7 @@ func (dc *DisruptionController) getExpectedPodCount(pdb *policy.PodDisruptionBud
return
}
func (dc *DisruptionController) getExpectedScale(pdb *policy.PodDisruptionBudget, pods []*v1.Pod) (expectedCount int32, err error) {
func (dc *DisruptionController) getExpectedScale(pdb *policy.PodDisruptionBudget, pods []*v1.Pod) (expectedCount int32, unmanagedPods []string, err error) {
// When the user specifies a fraction of pods that must be available, we
// use as the fraction's denominator
// SUM_{all c in C} scale(c)
@ -672,13 +677,19 @@ func (dc *DisruptionController) getExpectedScale(pdb *policy.PodDisruptionBudget
// A mapping from controllers to their scale.
controllerScale := map[types.UID]int32{}
// 1. Find the controller for each pod. If any pod has 0 controllers,
// that's an error. With ControllerRef, a pod can only have 1 controller.
// 1. Find the controller for each pod.
// As of now, we allow PDBs to be applied to pods via selectors, so there
// can be unmanaged pods(pods that don't have backing controllers) but still have PDBs associated.
// Such pods are to be collected and PDB backing them should be enqueued instead of immediately throwing
// a sync error. This ensures disruption controller is not frequently updating the status subresource and thus
// preventing excessive and expensive writes to etcd.
// With ControllerRef, a pod can only have 1 controller.
for _, pod := range pods {
controllerRef := metav1.GetControllerOf(pod)
if controllerRef == nil {
err = fmt.Errorf("found no controller ref for pod %q", pod.Name)
return
unmanagedPods = append(unmanagedPods, pod.Name)
continue
}
// If we already know the scale of the controller there is no need to do anything.

View File

@ -22,6 +22,7 @@ import (
"fmt"
"os"
"runtime/debug"
"strings"
"sync"
"testing"
"time"
@ -115,6 +116,15 @@ func (ps *pdbStates) VerifyDisruptionAllowed(t *testing.T, key string, disruptio
}
}
func (ps *pdbStates) VerifyNoStatusError(t *testing.T, key string) {
pdb := ps.Get(key)
for _, condition := range pdb.Status.Conditions {
if strings.Contains(condition.Message, "found no controller ref") && condition.Reason == policy.SyncFailedReason {
t.Fatalf("PodDisruption Controller should not error when unmanaged pods are found but it failed for %q", key)
}
}
}
type disruptionController struct {
*DisruptionController
@ -534,6 +544,47 @@ func TestNakedPod(t *testing.T) {
ps.VerifyDisruptionAllowed(t, pdbName, 0)
}
// Verify that disruption controller is not erroring when unmanaged pods are found
func TestStatusForUnmanagedPod(t *testing.T) {
dc, ps := newFakeDisruptionController()
pdb, pdbName := newMinAvailablePodDisruptionBudget(t, intstr.FromString("28%"))
add(t, dc.pdbStore, pdb)
dc.sync(pdbName)
// This verifies that when a PDB has 0 pods, disruptions are not allowed.
ps.VerifyDisruptionAllowed(t, pdbName, 0)
pod, _ := newPod(t, "unmanaged")
add(t, dc.podStore, pod)
dc.sync(pdbName)
ps.VerifyNoStatusError(t, pdbName)
}
// Check if the unmanaged pods are correctly collected or not
func TestTotalUnmanagedPods(t *testing.T) {
dc, ps := newFakeDisruptionController()
pdb, pdbName := newMinAvailablePodDisruptionBudget(t, intstr.FromString("28%"))
add(t, dc.pdbStore, pdb)
dc.sync(pdbName)
// This verifies that when a PDB has 0 pods, disruptions are not allowed.
ps.VerifyDisruptionAllowed(t, pdbName, 0)
pod, _ := newPod(t, "unmanaged")
add(t, dc.podStore, pod)
dc.sync(pdbName)
var pods []*v1.Pod
pods = append(pods, pod)
_, unmanagedPods, _ := dc.getExpectedScale(pdb, pods)
if len(unmanagedPods) != 1 {
t.Fatalf("expected one pod to be unmanaged pod but found %d", len(unmanagedPods))
}
ps.VerifyNoStatusError(t, pdbName)
}
// Verify that we count the scale of a ReplicaSet even when it has no Deployment.
func TestReplicaSet(t *testing.T) {
dc, ps := newFakeDisruptionController()
@ -752,7 +803,7 @@ func TestReplicationController(t *testing.T) {
rogue, _ := newPod(t, "rogue")
add(t, dc.podStore, rogue)
dc.sync(pdbName)
ps.VerifyDisruptionAllowed(t, pdbName, 0)
ps.VerifyDisruptionAllowed(t, pdbName, 2)
}
func TestStatefulSetController(t *testing.T) {

View File

@ -19,6 +19,8 @@ package apps
import (
"context"
"fmt"
"github.com/onsi/gomega"
"strings"
"time"
jsonpatch "github.com/evanphx/json-patch"
@ -185,6 +187,25 @@ var _ = SIGDescribe("DisruptionController", func() {
framework.ExpectEmpty(patched.Status.DisruptedPods, "Expecting the PodDisruptionBudget's be empty")
})
// PDB shouldn't error out when there are unmanaged pods
ginkgo.It("should observe that the PodDisruptionBudget status is not updated for unmanaged pods",
func() {
createPDBMinAvailableOrDie(cs, ns, defaultName, intstr.FromInt(1), defaultLabels)
createPodsOrDie(cs, ns, 3)
waitForPodsOrDie(cs, ns, 3)
// Since we allow unmanaged pods to be associated with a PDB, we should not see any error
gomega.Consistently(func() (bool, error) {
pdb, err := cs.PolicyV1().PodDisruptionBudgets(ns).Get(context.TODO(), defaultName, metav1.GetOptions{})
if err != nil {
return false, err
}
return isPDBErroring(pdb), nil
}, 1*time.Minute, 1*time.Second).ShouldNot(gomega.BeTrue(), "pod shouldn't error for "+
"unmanaged pod")
})
evictionCases := []struct {
description string
minAvailable intstr.IntOrString
@ -677,3 +698,15 @@ func unstructuredToPDB(obj *unstructured.Unstructured) (*policyv1.PodDisruptionB
pdb.APIVersion = ""
return pdb, err
}
// isPDBErroring checks if the PDB is erroring on when there are unmanaged pods
func isPDBErroring(pdb *policyv1.PodDisruptionBudget) bool {
hasFailed := false
for _, condition := range pdb.Status.Conditions {
if strings.Contains(condition.Reason, "SyncFailed") &&
strings.Contains(condition.Message, "found no controller ref for pod") {
hasFailed = true
}
}
return hasFailed
}