mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-24 04:06:03 +00:00
Merge pull request #103414 from ravisantoshgudimetla/fix-pdb-status
[disruptioncontroller] Don't error for unmanaged pods
This commit is contained in:
commit
ac6a1b1821
@ -595,11 +595,16 @@ func (dc *DisruptionController) trySync(pdb *policy.PodDisruptionBudget) error {
|
||||
dc.recorder.Eventf(pdb, v1.EventTypeNormal, "NoPods", "No matching pods found")
|
||||
}
|
||||
|
||||
expectedCount, desiredHealthy, err := dc.getExpectedPodCount(pdb, pods)
|
||||
expectedCount, desiredHealthy, unmanagedPods, err := dc.getExpectedPodCount(pdb, pods)
|
||||
if err != nil {
|
||||
dc.recorder.Eventf(pdb, v1.EventTypeWarning, "CalculateExpectedPodCountFailed", "Failed to calculate the number of expected pods: %v", err)
|
||||
return err
|
||||
}
|
||||
// We have unmamanged pods, instead of erroring and hotlooping in disruption controller, log and continue.
|
||||
if len(unmanagedPods) > 0 {
|
||||
klog.V(4).Infof("found unmanaged pods associated with this PDB: %v",
|
||||
strings.Join(unmanagedPods, ",'"))
|
||||
}
|
||||
|
||||
currentTime := time.Now()
|
||||
disruptedPods, recheckTime := dc.buildDisruptedPodMap(pods, pdb, currentTime)
|
||||
@ -615,7 +620,7 @@ func (dc *DisruptionController) trySync(pdb *policy.PodDisruptionBudget) error {
|
||||
return err
|
||||
}
|
||||
|
||||
func (dc *DisruptionController) getExpectedPodCount(pdb *policy.PodDisruptionBudget, pods []*v1.Pod) (expectedCount, desiredHealthy int32, err error) {
|
||||
func (dc *DisruptionController) getExpectedPodCount(pdb *policy.PodDisruptionBudget, pods []*v1.Pod) (expectedCount, desiredHealthy int32, unmanagedPods []string, err error) {
|
||||
err = nil
|
||||
// TODO(davidopp): consider making the way expectedCount and rules about
|
||||
// permitted controller configurations (specifically, considering it an error
|
||||
@ -623,7 +628,7 @@ func (dc *DisruptionController) getExpectedPodCount(pdb *policy.PodDisruptionBud
|
||||
// handled the same way for integer and percentage minAvailable
|
||||
|
||||
if pdb.Spec.MaxUnavailable != nil {
|
||||
expectedCount, err = dc.getExpectedScale(pdb, pods)
|
||||
expectedCount, unmanagedPods, err = dc.getExpectedScale(pdb, pods)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
@ -641,7 +646,7 @@ func (dc *DisruptionController) getExpectedPodCount(pdb *policy.PodDisruptionBud
|
||||
desiredHealthy = pdb.Spec.MinAvailable.IntVal
|
||||
expectedCount = int32(len(pods))
|
||||
} else if pdb.Spec.MinAvailable.Type == intstr.String {
|
||||
expectedCount, err = dc.getExpectedScale(pdb, pods)
|
||||
expectedCount, unmanagedPods, err = dc.getExpectedScale(pdb, pods)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
@ -657,7 +662,7 @@ func (dc *DisruptionController) getExpectedPodCount(pdb *policy.PodDisruptionBud
|
||||
return
|
||||
}
|
||||
|
||||
func (dc *DisruptionController) getExpectedScale(pdb *policy.PodDisruptionBudget, pods []*v1.Pod) (expectedCount int32, err error) {
|
||||
func (dc *DisruptionController) getExpectedScale(pdb *policy.PodDisruptionBudget, pods []*v1.Pod) (expectedCount int32, unmanagedPods []string, err error) {
|
||||
// When the user specifies a fraction of pods that must be available, we
|
||||
// use as the fraction's denominator
|
||||
// SUM_{all c in C} scale(c)
|
||||
@ -672,13 +677,19 @@ func (dc *DisruptionController) getExpectedScale(pdb *policy.PodDisruptionBudget
|
||||
// A mapping from controllers to their scale.
|
||||
controllerScale := map[types.UID]int32{}
|
||||
|
||||
// 1. Find the controller for each pod. If any pod has 0 controllers,
|
||||
// that's an error. With ControllerRef, a pod can only have 1 controller.
|
||||
// 1. Find the controller for each pod.
|
||||
|
||||
// As of now, we allow PDBs to be applied to pods via selectors, so there
|
||||
// can be unmanaged pods(pods that don't have backing controllers) but still have PDBs associated.
|
||||
// Such pods are to be collected and PDB backing them should be enqueued instead of immediately throwing
|
||||
// a sync error. This ensures disruption controller is not frequently updating the status subresource and thus
|
||||
// preventing excessive and expensive writes to etcd.
|
||||
// With ControllerRef, a pod can only have 1 controller.
|
||||
for _, pod := range pods {
|
||||
controllerRef := metav1.GetControllerOf(pod)
|
||||
if controllerRef == nil {
|
||||
err = fmt.Errorf("found no controller ref for pod %q", pod.Name)
|
||||
return
|
||||
unmanagedPods = append(unmanagedPods, pod.Name)
|
||||
continue
|
||||
}
|
||||
|
||||
// If we already know the scale of the controller there is no need to do anything.
|
||||
|
@ -22,6 +22,7 @@ import (
|
||||
"fmt"
|
||||
"os"
|
||||
"runtime/debug"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
@ -115,6 +116,15 @@ func (ps *pdbStates) VerifyDisruptionAllowed(t *testing.T, key string, disruptio
|
||||
}
|
||||
}
|
||||
|
||||
func (ps *pdbStates) VerifyNoStatusError(t *testing.T, key string) {
|
||||
pdb := ps.Get(key)
|
||||
for _, condition := range pdb.Status.Conditions {
|
||||
if strings.Contains(condition.Message, "found no controller ref") && condition.Reason == policy.SyncFailedReason {
|
||||
t.Fatalf("PodDisruption Controller should not error when unmanaged pods are found but it failed for %q", key)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type disruptionController struct {
|
||||
*DisruptionController
|
||||
|
||||
@ -534,6 +544,47 @@ func TestNakedPod(t *testing.T) {
|
||||
ps.VerifyDisruptionAllowed(t, pdbName, 0)
|
||||
}
|
||||
|
||||
// Verify that disruption controller is not erroring when unmanaged pods are found
|
||||
func TestStatusForUnmanagedPod(t *testing.T) {
|
||||
dc, ps := newFakeDisruptionController()
|
||||
|
||||
pdb, pdbName := newMinAvailablePodDisruptionBudget(t, intstr.FromString("28%"))
|
||||
add(t, dc.pdbStore, pdb)
|
||||
dc.sync(pdbName)
|
||||
// This verifies that when a PDB has 0 pods, disruptions are not allowed.
|
||||
ps.VerifyDisruptionAllowed(t, pdbName, 0)
|
||||
|
||||
pod, _ := newPod(t, "unmanaged")
|
||||
add(t, dc.podStore, pod)
|
||||
dc.sync(pdbName)
|
||||
|
||||
ps.VerifyNoStatusError(t, pdbName)
|
||||
|
||||
}
|
||||
|
||||
// Check if the unmanaged pods are correctly collected or not
|
||||
func TestTotalUnmanagedPods(t *testing.T) {
|
||||
dc, ps := newFakeDisruptionController()
|
||||
|
||||
pdb, pdbName := newMinAvailablePodDisruptionBudget(t, intstr.FromString("28%"))
|
||||
add(t, dc.pdbStore, pdb)
|
||||
dc.sync(pdbName)
|
||||
// This verifies that when a PDB has 0 pods, disruptions are not allowed.
|
||||
ps.VerifyDisruptionAllowed(t, pdbName, 0)
|
||||
|
||||
pod, _ := newPod(t, "unmanaged")
|
||||
add(t, dc.podStore, pod)
|
||||
dc.sync(pdbName)
|
||||
var pods []*v1.Pod
|
||||
pods = append(pods, pod)
|
||||
_, unmanagedPods, _ := dc.getExpectedScale(pdb, pods)
|
||||
if len(unmanagedPods) != 1 {
|
||||
t.Fatalf("expected one pod to be unmanaged pod but found %d", len(unmanagedPods))
|
||||
}
|
||||
ps.VerifyNoStatusError(t, pdbName)
|
||||
|
||||
}
|
||||
|
||||
// Verify that we count the scale of a ReplicaSet even when it has no Deployment.
|
||||
func TestReplicaSet(t *testing.T) {
|
||||
dc, ps := newFakeDisruptionController()
|
||||
@ -752,7 +803,7 @@ func TestReplicationController(t *testing.T) {
|
||||
rogue, _ := newPod(t, "rogue")
|
||||
add(t, dc.podStore, rogue)
|
||||
dc.sync(pdbName)
|
||||
ps.VerifyDisruptionAllowed(t, pdbName, 0)
|
||||
ps.VerifyDisruptionAllowed(t, pdbName, 2)
|
||||
}
|
||||
|
||||
func TestStatefulSetController(t *testing.T) {
|
||||
|
@ -19,6 +19,8 @@ package apps
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"github.com/onsi/gomega"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
jsonpatch "github.com/evanphx/json-patch"
|
||||
@ -185,6 +187,25 @@ var _ = SIGDescribe("DisruptionController", func() {
|
||||
framework.ExpectEmpty(patched.Status.DisruptedPods, "Expecting the PodDisruptionBudget's be empty")
|
||||
})
|
||||
|
||||
// PDB shouldn't error out when there are unmanaged pods
|
||||
ginkgo.It("should observe that the PodDisruptionBudget status is not updated for unmanaged pods",
|
||||
func() {
|
||||
createPDBMinAvailableOrDie(cs, ns, defaultName, intstr.FromInt(1), defaultLabels)
|
||||
|
||||
createPodsOrDie(cs, ns, 3)
|
||||
waitForPodsOrDie(cs, ns, 3)
|
||||
|
||||
// Since we allow unmanaged pods to be associated with a PDB, we should not see any error
|
||||
gomega.Consistently(func() (bool, error) {
|
||||
pdb, err := cs.PolicyV1().PodDisruptionBudgets(ns).Get(context.TODO(), defaultName, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return isPDBErroring(pdb), nil
|
||||
}, 1*time.Minute, 1*time.Second).ShouldNot(gomega.BeTrue(), "pod shouldn't error for "+
|
||||
"unmanaged pod")
|
||||
})
|
||||
|
||||
evictionCases := []struct {
|
||||
description string
|
||||
minAvailable intstr.IntOrString
|
||||
@ -677,3 +698,15 @@ func unstructuredToPDB(obj *unstructured.Unstructured) (*policyv1.PodDisruptionB
|
||||
pdb.APIVersion = ""
|
||||
return pdb, err
|
||||
}
|
||||
|
||||
// isPDBErroring checks if the PDB is erroring on when there are unmanaged pods
|
||||
func isPDBErroring(pdb *policyv1.PodDisruptionBudget) bool {
|
||||
hasFailed := false
|
||||
for _, condition := range pdb.Status.Conditions {
|
||||
if strings.Contains(condition.Reason, "SyncFailed") &&
|
||||
strings.Contains(condition.Message, "found no controller ref for pod") {
|
||||
hasFailed = true
|
||||
}
|
||||
}
|
||||
return hasFailed
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user