controller change for statefulset auto-delete (implementation)

This commit is contained in:
Matthew Cary 2021-06-30 10:37:16 -07:00
parent f1d5d4df5a
commit bce87a3e4f
5 changed files with 426 additions and 72 deletions

View File

@ -27,68 +27,115 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
errorutils "k8s.io/apimachinery/pkg/util/errors"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
utilfeature "k8s.io/apiserver/pkg/util/feature"
clientset "k8s.io/client-go/kubernetes"
appslisters "k8s.io/client-go/listers/apps/v1"
corelisters "k8s.io/client-go/listers/core/v1"
"k8s.io/client-go/tools/record"
"k8s.io/client-go/util/retry"
"k8s.io/klog/v2"
"k8s.io/kubernetes/pkg/features"
)
// StatefulPodControlInterface defines the interface that StatefulSetController uses to create, update, and delete Pods,
// StatefulPodControlObjectManager abstracts the manipulation of Pods and PVCs. The real controller implements this
// with a clientset for writes and listers for reads; for tests we provide stubs.
type StatefulPodControlObjectManager interface {
CreatePod(ctx context.Context, pod *v1.Pod) error
GetPod(namespace, podName string) (*v1.Pod, error)
UpdatePod(pod *v1.Pod) error
DeletePod(pod *v1.Pod) error
CreateClaim(claim *v1.PersistentVolumeClaim) error
GetClaim(namespace, claimName string) (*v1.PersistentVolumeClaim, error)
UpdateClaim(claim *v1.PersistentVolumeClaim) error
}
// StatefulPodControl defines the interface that StatefulSetController uses to create, update, and delete Pods,
// and to update the Status of a StatefulSet. It follows the design paradigms used for PodControl, but its
// implementation provides for PVC creation, ordered Pod creation, ordered Pod termination, and Pod identity enforcement.
// Like controller.PodControlInterface, it is implemented as an interface to provide for testing fakes.
type StatefulPodControlInterface interface {
// CreateStatefulPod create a Pod in a StatefulSet. Any PVCs necessary for the Pod are created prior to creating
// the Pod. If the returned error is nil the Pod and its PVCs have been created.
CreateStatefulPod(ctx context.Context, set *apps.StatefulSet, pod *v1.Pod) error
// UpdateStatefulPod Updates a Pod in a StatefulSet. If the Pod already has the correct identity and stable
// storage this method is a no-op. If the Pod must be mutated to conform to the Set, it is mutated and updated.
// pod is an in-out parameter, and any updates made to the pod are reflected as mutations to this parameter. If
// the create is successful, the returned error is nil.
UpdateStatefulPod(set *apps.StatefulSet, pod *v1.Pod) error
// DeleteStatefulPod deletes a Pod in a StatefulSet. The pods PVCs are not deleted. If the delete is successful,
// the returned error is nil.
DeleteStatefulPod(set *apps.StatefulSet, pod *v1.Pod) error
}
func NewRealStatefulPodControl(
client clientset.Interface,
setLister appslisters.StatefulSetLister,
podLister corelisters.PodLister,
pvcLister corelisters.PersistentVolumeClaimLister,
recorder record.EventRecorder,
) StatefulPodControlInterface {
return &realStatefulPodControl{client, setLister, podLister, pvcLister, recorder}
}
// realStatefulPodControl implements StatefulPodControlInterface using a clientset.Interface to communicate with the
// API server. The struct is package private as the internal details are irrelevant to importing packages.
type realStatefulPodControl struct {
client clientset.Interface
setLister appslisters.StatefulSetLister
podLister corelisters.PodLister
pvcLister corelisters.PersistentVolumeClaimLister
// Manipulation of objects is provided through objectMgr, which allows the k8s API to be mocked out for testing.
type StatefulPodControl struct {
objectMgr StatefulPodControlObjectManager
recorder record.EventRecorder
}
func (spc *realStatefulPodControl) CreateStatefulPod(ctx context.Context, set *apps.StatefulSet, pod *v1.Pod) error {
// NewStatefulPodControl constructs a StatefulPodControl using a realStatefulPodControlObjectManager with the given
// clientset, listers and EventRecorder.
func NewStatefulPodControl(
client clientset.Interface,
podLister corelisters.PodLister,
claimLister corelisters.PersistentVolumeClaimLister,
recorder record.EventRecorder,
) *StatefulPodControl {
return &StatefulPodControl{&realStatefulPodControlObjectManager{client, podLister, claimLister}, recorder}
}
// NewStatefulPodControlFromManager creates a StatefulPodControl using the given StatefulPodControlObjectManager and recorder.
func NewStatefulPodControlFromManager(om StatefulPodControlObjectManager, recorder record.EventRecorder) *StatefulPodControl {
return &StatefulPodControl{om, recorder}
}
// realStatefulPodControlObjectManager uses a clientset.Interface and listers.
type realStatefulPodControlObjectManager struct {
client clientset.Interface
podLister corelisters.PodLister
claimLister corelisters.PersistentVolumeClaimLister
}
func (om *realStatefulPodControlObjectManager) CreatePod(ctx context.Context, pod *v1.Pod) error {
_, err := om.client.CoreV1().Pods(pod.Namespace).Create(ctx, pod, metav1.CreateOptions{})
return err
}
func (om *realStatefulPodControlObjectManager) GetPod(namespace, podName string) (*v1.Pod, error) {
return om.podLister.Pods(namespace).Get(podName)
}
func (om *realStatefulPodControlObjectManager) UpdatePod(pod *v1.Pod) error {
_, err := om.client.CoreV1().Pods(pod.Namespace).Update(context.TODO(), pod, metav1.UpdateOptions{})
return err
}
func (om *realStatefulPodControlObjectManager) DeletePod(pod *v1.Pod) error {
return om.client.CoreV1().Pods(pod.Namespace).Delete(context.TODO(), pod.Name, metav1.DeleteOptions{})
}
func (om *realStatefulPodControlObjectManager) CreateClaim(claim *v1.PersistentVolumeClaim) error {
_, err := om.client.CoreV1().PersistentVolumeClaims(claim.Namespace).Create(context.TODO(), claim, metav1.CreateOptions{})
return err
}
func (om *realStatefulPodControlObjectManager) GetClaim(namespace, claimName string) (*v1.PersistentVolumeClaim, error) {
return om.claimLister.PersistentVolumeClaims(namespace).Get(claimName)
}
func (om *realStatefulPodControlObjectManager) UpdateClaim(claim *v1.PersistentVolumeClaim) error {
_, err := om.client.CoreV1().PersistentVolumeClaims(claim.Namespace).Update(context.TODO(), claim, metav1.UpdateOptions{})
return err
}
func (spc *StatefulPodControl) CreateStatefulPod(ctx context.Context, set *apps.StatefulSet, pod *v1.Pod) error {
// Create the Pod's PVCs prior to creating the Pod
if err := spc.createPersistentVolumeClaims(set, pod); err != nil {
spc.recordPodEvent("create", set, pod, err)
return err
}
// If we created the PVCs attempt to create the Pod
_, err := spc.client.CoreV1().Pods(set.Namespace).Create(ctx, pod, metav1.CreateOptions{})
err := spc.objectMgr.CreatePod(ctx, pod)
// sink already exists errors
if apierrors.IsAlreadyExists(err) {
return err
}
if utilfeature.DefaultFeatureGate.Enabled(features.StatefulSetAutoDeletePVC) {
// Set PVC policy as much as is possible at this point.
if err := spc.UpdatePodClaimForRetentionPolicy(set, pod); err != nil {
spc.recordPodEvent("update", set, pod, err)
return err
}
}
spc.recordPodEvent("create", set, pod, err)
return err
}
func (spc *realStatefulPodControl) UpdateStatefulPod(set *apps.StatefulSet, pod *v1.Pod) error {
func (spc *StatefulPodControl) UpdateStatefulPod(set *apps.StatefulSet, pod *v1.Pod) error {
attemptedUpdate := false
err := retry.RetryOnConflict(retry.DefaultBackoff, func() error {
// assume the Pod is consistent
@ -108,6 +155,21 @@ func (spc *realStatefulPodControl) UpdateStatefulPod(set *apps.StatefulSet, pod
return err
}
}
if utilfeature.DefaultFeatureGate.Enabled(features.StatefulSetAutoDeletePVC) {
// if the Pod's PVCs are not consistent with the StatefulSet's PVC deletion policy, update the PVC
// and dirty the pod.
if match, err := spc.ClaimsMatchRetentionPolicy(set, pod); err != nil {
spc.recordPodEvent("update", set, pod, err)
return err
} else if !match {
if err := spc.UpdatePodClaimForRetentionPolicy(set, pod); err != nil {
spc.recordPodEvent("update", set, pod, err)
return err
}
consistent = false
}
}
// if the Pod is not dirty, do nothing
if consistent {
return nil
@ -115,16 +177,17 @@ func (spc *realStatefulPodControl) UpdateStatefulPod(set *apps.StatefulSet, pod
attemptedUpdate = true
// commit the update, retrying on conflicts
_, updateErr := spc.client.CoreV1().Pods(set.Namespace).Update(context.TODO(), pod, metav1.UpdateOptions{})
updateErr := spc.objectMgr.UpdatePod(pod)
if updateErr == nil {
return nil
}
if updated, err := spc.podLister.Pods(set.Namespace).Get(pod.Name); err == nil {
if updated, err := spc.objectMgr.GetPod(set.Namespace, pod.Name); err == nil {
// make a copy so we don't mutate the shared cache
pod = updated.DeepCopy()
} else {
utilruntime.HandleError(fmt.Errorf("error getting updated Pod %s/%s from lister: %v", set.Namespace, pod.Name, err))
utilruntime.HandleError(fmt.Errorf("error getting updated Pod %s/%s: %w", set.Namespace, pod.Name, err))
}
return updateErr
@ -135,15 +198,92 @@ func (spc *realStatefulPodControl) UpdateStatefulPod(set *apps.StatefulSet, pod
return err
}
func (spc *realStatefulPodControl) DeleteStatefulPod(set *apps.StatefulSet, pod *v1.Pod) error {
err := spc.client.CoreV1().Pods(set.Namespace).Delete(context.TODO(), pod.Name, metav1.DeleteOptions{})
func (spc *StatefulPodControl) DeleteStatefulPod(set *apps.StatefulSet, pod *v1.Pod) error {
err := spc.objectMgr.DeletePod(pod)
spc.recordPodEvent("delete", set, pod, err)
return err
}
// ClaimsMatchRetentionPolicy returns false if the PVCs for pod are not consistent with set's PVC deletion policy.
// An error is returned if something is not consistent. This is expected if the pod is being otherwise updated,
// but a problem otherwise (see usage of this method in UpdateStatefulPod).
func (spc *StatefulPodControl) ClaimsMatchRetentionPolicy(set *apps.StatefulSet, pod *v1.Pod) (bool, error) {
ordinal := getOrdinal(pod)
templates := set.Spec.VolumeClaimTemplates
for i := range templates {
claimName := getPersistentVolumeClaimName(set, &templates[i], ordinal)
claim, err := spc.objectMgr.GetClaim(set.Namespace, claimName)
switch {
case apierrors.IsNotFound(err):
klog.V(4).Infof("Expected claim %s missing, continuing to pick up in next iteration", claimName)
case err != nil:
return false, fmt.Errorf("Could not retrieve claim %s for %s when checking PVC deletion policy", claimName, pod.Name)
default:
if !claimOwnerMatchesSetAndPod(claim, set, pod) {
return false, nil
}
}
}
return true, nil
}
// UpdatePodClaimForRetentionPolicy updates the PVCs used by pod to match the PVC deletion policy of set.
func (spc *StatefulPodControl) UpdatePodClaimForRetentionPolicy(set *apps.StatefulSet, pod *v1.Pod) error {
ordinal := getOrdinal(pod)
templates := set.Spec.VolumeClaimTemplates
for i := range templates {
claimName := getPersistentVolumeClaimName(set, &templates[i], ordinal)
claim, err := spc.objectMgr.GetClaim(set.Namespace, claimName)
switch {
case apierrors.IsNotFound(err):
klog.V(4).Infof("Expected claim %s missing, continuing to pick up in next iteration.")
case err != nil:
return fmt.Errorf("Could not retrieve claim %s not found for %s when checking PVC deletion policy: %w", claimName, pod.Name, err)
default:
if !claimOwnerMatchesSetAndPod(claim, set, pod) {
needsUpdate := updateClaimOwnerRefForSetAndPod(claim, set, pod)
if needsUpdate {
err := spc.objectMgr.UpdateClaim(claim)
if err != nil {
return fmt.Errorf("Could not update claim %s for delete policy ownerRefs: %w", claimName, err)
}
}
}
}
}
return nil
}
// PodClaimIsStale returns true for a stale PVC that should block pod creation. If the scaling
// policy is deletion, and a PVC has an ownerRef that does not match the pod, the PVC is stale. This
// includes pods whose UID has not been created.
func (spc *StatefulPodControl) PodClaimIsStale(set *apps.StatefulSet, pod *v1.Pod) (bool, error) {
policy := getPersistentVolumeClaimRetentionPolicy(set)
if policy.WhenScaled == apps.RetainPersistentVolumeClaimRetentionPolicyType {
// PVCs are meant to be reused and so can't be stale.
return false, nil
}
for _, claim := range getPersistentVolumeClaims(set, pod) {
pvc, err := spc.objectMgr.GetClaim(claim.Namespace, claim.Name)
switch {
case apierrors.IsNotFound(err):
// If the claim doesn't exist yet, it can't be stale.
continue
case err != nil:
return false, err
case err == nil:
// A claim is stale if it doesn't match the pod's UID, including if the pod has no UID.
if hasStaleOwnerRef(pvc, pod) {
return true, nil
}
}
}
return false, nil
}
// recordPodEvent records an event for verb applied to a Pod in a StatefulSet. If err is nil the generated event will
// have a reason of v1.EventTypeNormal. If err is not nil the generated event will have a reason of v1.EventTypeWarning.
func (spc *realStatefulPodControl) recordPodEvent(verb string, set *apps.StatefulSet, pod *v1.Pod, err error) {
func (spc *StatefulPodControl) recordPodEvent(verb string, set *apps.StatefulSet, pod *v1.Pod, err error) {
if err == nil {
reason := fmt.Sprintf("Successful%s", strings.Title(verb))
message := fmt.Sprintf("%s Pod %s in StatefulSet %s successful",
@ -160,7 +300,7 @@ func (spc *realStatefulPodControl) recordPodEvent(verb string, set *apps.Statefu
// recordClaimEvent records an event for verb applied to the PersistentVolumeClaim of a Pod in a StatefulSet. If err is
// nil the generated event will have a reason of v1.EventTypeNormal. If err is not nil the generated event will have a
// reason of v1.EventTypeWarning.
func (spc *realStatefulPodControl) recordClaimEvent(verb string, set *apps.StatefulSet, pod *v1.Pod, claim *v1.PersistentVolumeClaim, err error) {
func (spc *StatefulPodControl) recordClaimEvent(verb string, set *apps.StatefulSet, pod *v1.Pod, claim *v1.PersistentVolumeClaim, err error) {
if err == nil {
reason := fmt.Sprintf("Successful%s", strings.Title(verb))
message := fmt.Sprintf("%s Claim %s Pod %s in StatefulSet %s success",
@ -178,13 +318,13 @@ func (spc *realStatefulPodControl) recordClaimEvent(verb string, set *apps.State
// set. If all of the claims for Pod are successfully created, the returned error is nil. If creation fails, this method
// may be called again until no error is returned, indicating the PersistentVolumeClaims for pod are consistent with
// set's Spec.
func (spc *realStatefulPodControl) createPersistentVolumeClaims(set *apps.StatefulSet, pod *v1.Pod) error {
func (spc *StatefulPodControl) createPersistentVolumeClaims(set *apps.StatefulSet, pod *v1.Pod) error {
var errs []error
for _, claim := range getPersistentVolumeClaims(set, pod) {
pvc, err := spc.pvcLister.PersistentVolumeClaims(claim.Namespace).Get(claim.Name)
pvc, err := spc.objectMgr.GetClaim(claim.Namespace, claim.Name)
switch {
case apierrors.IsNotFound(err):
_, err := spc.client.CoreV1().PersistentVolumeClaims(claim.Namespace).Create(context.TODO(), &claim, metav1.CreateOptions{})
err := spc.objectMgr.CreateClaim(&claim)
if err != nil {
errs = append(errs, fmt.Errorf("failed to create PVC %s: %s", claim.Name, err))
}
@ -203,5 +343,3 @@ func (spc *realStatefulPodControl) createPersistentVolumeClaims(set *apps.Statef
}
return errorutils.NewAggregate(errs)
}
var _ StatefulPodControlInterface = &realStatefulPodControl{}

View File

@ -91,9 +91,8 @@ func NewStatefulSetController(
ssc := &StatefulSetController{
kubeClient: kubeClient,
control: NewDefaultStatefulSetControl(
NewRealStatefulPodControl(
NewStatefulPodControl(
kubeClient,
setInformer.Lister(),
podInformer.Lister(),
pvcInformer.Lister(),
recorder),
@ -352,10 +351,14 @@ func (ssc *StatefulSetController) getStatefulSetsForPod(pod *v1.Pod) []*apps.Sta
if len(sets) > 1 {
// ControllerRef will ensure we don't do anything crazy, but more than one
// item in this list nevertheless constitutes user error.
setNames := []string{}
for _, s := range sets {
setNames = append(setNames, s.Name)
}
utilruntime.HandleError(
fmt.Errorf(
"user error: more than one StatefulSet is selecting pods with labels: %+v",
pod.Labels))
"user error: more than one StatefulSet is selecting pods with labels: %+v. Sets: %v",
pod.Labels, setNames))
}
return sets
}

View File

@ -54,7 +54,7 @@ type StatefulSetControlInterface interface {
// to update the status of StatefulSets. You should use an instance returned from NewRealStatefulPodControl() for any
// scenario other than testing.
func NewDefaultStatefulSetControl(
podControl StatefulPodControlInterface,
podControl *StatefulPodControl,
statusUpdater StatefulSetStatusUpdaterInterface,
controllerHistory history.Interface,
recorder record.EventRecorder) StatefulSetControlInterface {
@ -62,7 +62,7 @@ func NewDefaultStatefulSetControl(
}
type defaultStatefulSetControl struct {
podControl StatefulPodControlInterface
podControl *StatefulPodControl
statusUpdater StatefulSetStatusUpdaterInterface
controllerHistory history.Interface
recorder record.EventRecorder
@ -333,7 +333,6 @@ func (ssc *defaultStatefulSetControl) updateStatefulSet(
// if the ordinal of the pod is within the range of the current number of replicas,
// insert it at the indirection of its ordinal
replicas[ord] = pods[i]
} else if ord >= replicaCount {
// if the ordinal is greater than the number of replicas add it to the condemned list
condemned = append(condemned, pods[i])
@ -418,6 +417,14 @@ func (ssc *defaultStatefulSetControl) updateStatefulSet(
}
// If we find a Pod that has not been created we create the Pod
if !isCreated(replicas[i]) {
if utilfeature.DefaultFeatureGate.Enabled(features.StatefulSetAutoDeletePVC) {
if isStale, err := ssc.podControl.PodClaimIsStale(set, replicas[i]); err != nil {
return &status, err
} else if isStale {
// If a pod has a stale PVC, no more work can be done this round.
return &status, err
}
}
if err := ssc.podControl.CreateStatefulPod(ctx, set, replicas[i]); err != nil {
return &status, err
}
@ -428,7 +435,6 @@ func (ssc *defaultStatefulSetControl) updateStatefulSet(
if getPodRevision(replicas[i]) == updateRevision.Name {
status.UpdatedReplicas++
}
// if the set does not allow bursting, return immediately
if monotonic {
return &status, nil
@ -471,7 +477,16 @@ func (ssc *defaultStatefulSetControl) updateStatefulSet(
return &status, nil
}
// Enforce the StatefulSet invariants
if identityMatches(set, replicas[i]) && storageMatches(set, replicas[i]) {
retentionMatch := true
if utilfeature.DefaultFeatureGate.Enabled(features.StatefulSetAutoDeletePVC) {
var err error
retentionMatch, err = ssc.podControl.ClaimsMatchRetentionPolicy(updateSet, replicas[i])
// An error is expected if the pod is not yet fully updated, and so return is treated as matching.
if err != nil {
retentionMatch = true
}
}
if identityMatches(set, replicas[i]) && storageMatches(set, replicas[i]) && retentionMatch {
continue
}
// Make a deep copy so we don't mutate the shared cache
@ -481,6 +496,19 @@ func (ssc *defaultStatefulSetControl) updateStatefulSet(
}
}
if utilfeature.DefaultFeatureGate.Enabled(features.StatefulSetAutoDeletePVC) {
// Ensure ownerRefs are set correctly for the condemned pods.
for i := range condemned {
if matchPolicy, err := ssc.podControl.ClaimsMatchRetentionPolicy(updateSet, condemned[i]); err != nil {
return &status, err
} else if !matchPolicy {
if err := ssc.podControl.UpdatePodClaimForRetentionPolicy(updateSet, condemned[i]); err != nil {
return &status, err
}
}
}
}
// At this point, all of the current Replicas are Running, Ready and Available, we can consider termination.
// We will wait for all predecessors to be Running and Ready prior to attempting a deletion.
// We will terminate Pods in a monotonically decreasing order over [len(pods),set.Spec.Replicas).

View File

@ -29,6 +29,7 @@ import (
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/strategicpatch"
"k8s.io/client-go/kubernetes/scheme"
"k8s.io/klog/v2"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
"k8s.io/kubernetes/pkg/controller"
"k8s.io/kubernetes/pkg/controller/history"
@ -132,6 +133,179 @@ func storageMatches(set *apps.StatefulSet, pod *v1.Pod) bool {
return true
}
// getPersistentVolumeClaimPolicy returns the PVC policy for a StatefulSet, returning a retain policy if the set policy is nil.
func getPersistentVolumeClaimRetentionPolicy(set *apps.StatefulSet) apps.StatefulSetPersistentVolumeClaimRetentionPolicy {
policy := apps.StatefulSetPersistentVolumeClaimRetentionPolicy{
WhenDeleted: apps.RetainPersistentVolumeClaimRetentionPolicyType,
WhenScaled: apps.RetainPersistentVolumeClaimRetentionPolicyType,
}
if set.Spec.PersistentVolumeClaimRetentionPolicy != nil {
policy = *set.Spec.PersistentVolumeClaimRetentionPolicy
}
return policy
}
// claimOwnerMatchesSetAndPod returns false if the ownerRefs of the claim are not set consistently with the
// PVC deletion policy for the StatefulSet.
func claimOwnerMatchesSetAndPod(claim *v1.PersistentVolumeClaim, set *apps.StatefulSet, pod *v1.Pod) bool {
policy := set.Spec.PersistentVolumeClaimRetentionPolicy
const retain = apps.RetainPersistentVolumeClaimRetentionPolicyType
const delete = apps.DeletePersistentVolumeClaimRetentionPolicyType
switch {
default:
klog.Errorf("Unknown policy %v; treating as Retain", set.Spec.PersistentVolumeClaimRetentionPolicy)
fallthrough
case policy.WhenScaled == retain && policy.WhenDeleted == retain:
if hasOwnerRef(claim, set) ||
hasOwnerRef(claim, pod) {
return false
}
case policy.WhenScaled == retain && policy.WhenDeleted == delete:
if !hasOwnerRef(claim, set) ||
hasOwnerRef(claim, pod) {
return false
}
case policy.WhenScaled == delete && policy.WhenDeleted == retain:
if hasOwnerRef(claim, set) {
return false
}
podScaledDown := getOrdinal(pod) >= int(*set.Spec.Replicas)
if podScaledDown != hasOwnerRef(claim, pod) {
return false
}
case policy.WhenScaled == delete && policy.WhenDeleted == delete:
podScaledDown := getOrdinal(pod) >= int(*set.Spec.Replicas)
// If a pod is scaled down, there should be no set ref and a pod ref;
// if the pod is not scaled down it's the other way around.
if podScaledDown == hasOwnerRef(claim, set) {
return false
}
if podScaledDown != hasOwnerRef(claim, pod) {
return false
}
}
return true
}
// updateClaimOwnerRefForSetAndPod updates the ownerRefs for the claim according to the deletion policy of
// the StatefulSet. Returns true if the claim was changed and should be updated and false otherwise.
func updateClaimOwnerRefForSetAndPod(claim *v1.PersistentVolumeClaim, set *apps.StatefulSet, pod *v1.Pod) bool {
needsUpdate := false
// Sometimes the version and kind are not set {pod,set}.TypeMeta. These are necessary for the ownerRef.
// This is the case both in real clusters and the unittests.
// TODO: there must be a better way to do this other than hardcoding the pod version?
updateMeta := func(tm *metav1.TypeMeta, kind string) {
if tm.APIVersion == "" {
if kind == "StatefulSet" {
tm.APIVersion = "apps/v1"
} else {
tm.APIVersion = "v1"
}
}
if tm.Kind == "" {
tm.Kind = kind
}
}
podMeta := pod.TypeMeta
updateMeta(&podMeta, "Pod")
setMeta := set.TypeMeta
updateMeta(&setMeta, "StatefulSet")
policy := set.Spec.PersistentVolumeClaimRetentionPolicy
const retain = apps.RetainPersistentVolumeClaimRetentionPolicyType
const delete = apps.DeletePersistentVolumeClaimRetentionPolicyType
switch {
default:
klog.Errorf("Unknown policy %v, treating as Retain", set.Spec.PersistentVolumeClaimRetentionPolicy)
fallthrough
case policy.WhenScaled == retain && policy.WhenDeleted == retain:
needsUpdate = removeOwnerRef(claim, set) || needsUpdate
needsUpdate = removeOwnerRef(claim, pod) || needsUpdate
case policy.WhenScaled == retain && policy.WhenDeleted == delete:
needsUpdate = setOwnerRef(claim, set, &setMeta) || needsUpdate
needsUpdate = removeOwnerRef(claim, pod) || needsUpdate
case policy.WhenScaled == delete && policy.WhenDeleted == retain:
needsUpdate = removeOwnerRef(claim, set) || needsUpdate
podScaledDown := getOrdinal(pod) >= int(*set.Spec.Replicas)
if podScaledDown {
needsUpdate = setOwnerRef(claim, pod, &podMeta) || needsUpdate
}
if !podScaledDown {
needsUpdate = removeOwnerRef(claim, pod) || needsUpdate
}
case policy.WhenScaled == delete && policy.WhenDeleted == delete:
podScaledDown := getOrdinal(pod) >= int(*set.Spec.Replicas)
if podScaledDown {
needsUpdate = removeOwnerRef(claim, set) || needsUpdate
needsUpdate = setOwnerRef(claim, pod, &podMeta) || needsUpdate
}
if !podScaledDown {
needsUpdate = setOwnerRef(claim, set, &setMeta) || needsUpdate
needsUpdate = removeOwnerRef(claim, pod) || needsUpdate
}
}
return needsUpdate
}
// hasOwnerRef returns true if target has an ownerRef to owner.
func hasOwnerRef(target, owner metav1.Object) bool {
ownerUID := owner.GetUID()
for _, ownerRef := range target.GetOwnerReferences() {
if ownerRef.UID == ownerUID {
return true
}
}
return false
}
// hasStaleOwnerRef returns true if target has a ref to owner that appears to be stale.
func hasStaleOwnerRef(target, owner metav1.Object) bool {
for _, ownerRef := range target.GetOwnerReferences() {
if ownerRef.Name == owner.GetName() && ownerRef.UID != owner.GetUID() {
return true
}
}
return false
}
// setOwnerRef adds owner to the ownerRefs of target, if necessary. Returns true if target needs to be
// updated and false otherwise.
func setOwnerRef(target, owner metav1.Object, ownerType *metav1.TypeMeta) bool {
if hasOwnerRef(target, owner) {
return false
}
ownerRefs := append(
target.GetOwnerReferences(),
metav1.OwnerReference{
APIVersion: ownerType.APIVersion,
Kind: ownerType.Kind,
Name: owner.GetName(),
UID: owner.GetUID(),
})
target.SetOwnerReferences(ownerRefs)
return true
}
// removeOwnerRef removes owner from the ownerRefs of target, if necessary. Returns true if target needs
// to be updated and false otherwise.
func removeOwnerRef(target, owner metav1.Object) bool {
if !hasOwnerRef(target, owner) {
return false
}
ownerUID := owner.GetUID()
oldRefs := target.GetOwnerReferences()
newRefs := make([]metav1.OwnerReference, len(oldRefs)-1)
skip := 0
for i := range oldRefs {
if oldRefs[i].UID == ownerUID {
skip = -1
} else {
newRefs[i+skip] = oldRefs[i]
}
}
target.SetOwnerReferences(newRefs)
return true
}
// getPersistentVolumeClaims gets a map of PersistentVolumeClaims to their template names, as defined in set. The
// returned PersistentVolumeClaims are each constructed with a the name specific to the Pod. This name is determined
// by getPersistentVolumeClaimName.
@ -278,7 +452,10 @@ func newVersionedStatefulSetPod(currentSet, updateSet *apps.StatefulSet, current
// Match check if the given StatefulSet's template matches the template stored in the given history.
func Match(ss *apps.StatefulSet, history *apps.ControllerRevision) (bool, error) {
patch, err := getPatch(ss)
// Encoding the set for the patch may update its GVK metadata, which causes data races if this
// set is in an informer cache.
clone := ss.DeepCopy()
patch, err := getPatch(clone)
if err != nil {
return false, err
}

View File

@ -339,19 +339,27 @@ func buildControllerRoles() ([]rbacv1.ClusterRole, []rbacv1.ClusterRoleBinding)
eventsRule(),
},
})
addControllerRole(&controllerRoles, &controllerRoleBindings, rbacv1.ClusterRole{
ObjectMeta: metav1.ObjectMeta{Name: saRolePrefix + "statefulset-controller"},
Rules: []rbacv1.PolicyRule{
rbacv1helpers.NewRule("list", "watch").Groups(legacyGroup).Resources("pods").RuleOrDie(),
rbacv1helpers.NewRule("get", "list", "watch").Groups(appsGroup).Resources("statefulsets").RuleOrDie(),
rbacv1helpers.NewRule("update").Groups(appsGroup).Resources("statefulsets/status").RuleOrDie(),
rbacv1helpers.NewRule("update").Groups(appsGroup).Resources("statefulsets/finalizers").RuleOrDie(),
rbacv1helpers.NewRule("get", "create", "delete", "update", "patch").Groups(legacyGroup).Resources("pods").RuleOrDie(),
rbacv1helpers.NewRule("get", "create", "delete", "update", "patch", "list", "watch").Groups(appsGroup).Resources("controllerrevisions").RuleOrDie(),
rbacv1helpers.NewRule("get", "create").Groups(legacyGroup).Resources("persistentvolumeclaims").RuleOrDie(),
eventsRule(),
},
})
addControllerRole(&controllerRoles, &controllerRoleBindings, func() rbacv1.ClusterRole {
role := rbacv1.ClusterRole{
ObjectMeta: metav1.ObjectMeta{Name: saRolePrefix + "statefulset-controller"},
Rules: []rbacv1.PolicyRule{
rbacv1helpers.NewRule("list", "watch").Groups(legacyGroup).Resources("pods").RuleOrDie(),
rbacv1helpers.NewRule("get", "list", "watch").Groups(appsGroup).Resources("statefulsets").RuleOrDie(),
rbacv1helpers.NewRule("update").Groups(appsGroup).Resources("statefulsets/status").RuleOrDie(),
rbacv1helpers.NewRule("update").Groups(appsGroup).Resources("statefulsets/finalizers").RuleOrDie(),
rbacv1helpers.NewRule("get", "create", "delete", "update", "patch").Groups(legacyGroup).Resources("pods").RuleOrDie(),
rbacv1helpers.NewRule("get", "create", "delete", "update", "patch", "list", "watch").Groups(appsGroup).Resources("controllerrevisions").RuleOrDie(),
rbacv1helpers.NewRule("get", "create").Groups(legacyGroup).Resources("persistentvolumeclaims").RuleOrDie(),
eventsRule(),
},
}
if utilfeature.DefaultFeatureGate.Enabled(features.StatefulSetAutoDeletePVC) {
role.Rules = append(role.Rules, rbacv1helpers.NewRule("update").Groups(legacyGroup).Resources("persistentvolumeclaims").RuleOrDie())
}
return role
}())
addControllerRole(&controllerRoles, &controllerRoleBindings, rbacv1.ClusterRole{
ObjectMeta: metav1.ObjectMeta{Name: saRolePrefix + "ttl-controller"},
Rules: []rbacv1.PolicyRule{