Implement pod deletion cost

This commit is contained in:
Abdullah Gharaibeh 2021-02-17 15:39:42 -05:00
parent 35c233f18d
commit d7e80ab038
15 changed files with 292 additions and 86 deletions

View File

@ -345,29 +345,34 @@ func usesMultipleHugePageResources(podSpec *api.PodSpec) bool {
return len(hugePageResources) > 1
}
// GetValidationOptionsFromPodSpec returns validation options based on pod specs
func GetValidationOptionsFromPodSpec(podSpec, oldPodSpec *api.PodSpec) apivalidation.PodValidationOptions {
// GetValidationOptionsFromPodSpecAndMeta returns validation options based on pod specs and metadata
func GetValidationOptionsFromPodSpecAndMeta(podSpec, oldPodSpec *api.PodSpec, podMeta, oldPodMeta *metav1.ObjectMeta) apivalidation.PodValidationOptions {
// default pod validation options based on feature gate
opts := validation.PodValidationOptions{
// Allow multiple huge pages on pod create if feature is enabled
AllowMultipleHugePageResources: utilfeature.DefaultFeatureGate.Enabled(features.HugePageStorageMediumSize),
// Allow pod spec to use hugepages in downward API if feature is enabled
AllowDownwardAPIHugePages: utilfeature.DefaultFeatureGate.Enabled(features.DownwardAPIHugePages),
AllowDownwardAPIHugePages: utilfeature.DefaultFeatureGate.Enabled(features.DownwardAPIHugePages),
AllowInvalidPodDeletionCost: !utilfeature.DefaultFeatureGate.Enabled(features.PodDeletionCost),
}
// if we are not doing an update operation, just return with default options
if oldPodSpec == nil {
return opts
if oldPodSpec != nil {
// if old spec used multiple huge page sizes, we must allow it
opts.AllowMultipleHugePageResources = opts.AllowMultipleHugePageResources || usesMultipleHugePageResources(oldPodSpec)
// if old spec used hugepages in downward api, we must allow it
opts.AllowDownwardAPIHugePages = opts.AllowDownwardAPIHugePages || usesHugePagesInProjectedVolume(oldPodSpec)
// determine if any container is using hugepages in env var
if !opts.AllowDownwardAPIHugePages {
VisitContainers(oldPodSpec, AllContainers, func(c *api.Container, containerType ContainerType) bool {
opts.AllowDownwardAPIHugePages = opts.AllowDownwardAPIHugePages || usesHugePagesInProjectedEnv(*c)
return !opts.AllowDownwardAPIHugePages
})
}
}
// if old spec used multiple huge page sizes, we must allow it
opts.AllowMultipleHugePageResources = opts.AllowMultipleHugePageResources || usesMultipleHugePageResources(oldPodSpec)
// if old spec used hugepages in downward api, we must allow it
opts.AllowDownwardAPIHugePages = opts.AllowDownwardAPIHugePages || usesHugePagesInProjectedVolume(oldPodSpec)
// determine if any container is using hugepages in env var
if !opts.AllowDownwardAPIHugePages {
VisitContainers(oldPodSpec, AllContainers, func(c *api.Container, containerType ContainerType) bool {
opts.AllowDownwardAPIHugePages = opts.AllowDownwardAPIHugePages || usesHugePagesInProjectedEnv(*c)
return !opts.AllowDownwardAPIHugePages
})
if oldPodMeta != nil && !opts.AllowInvalidPodDeletionCost {
// This is an update, so validate only if the existing object was valid.
_, err := helper.GetDeletionCostFromPodAnnotations(oldPodMeta.Annotations)
opts.AllowInvalidPodDeletionCost = err != nil
}
return opts
}
@ -375,15 +380,18 @@ func GetValidationOptionsFromPodSpec(podSpec, oldPodSpec *api.PodSpec) apivalida
// GetValidationOptionsFromPodTemplate will return pod validation options for specified template.
func GetValidationOptionsFromPodTemplate(podTemplate, oldPodTemplate *api.PodTemplateSpec) apivalidation.PodValidationOptions {
var newPodSpec, oldPodSpec *api.PodSpec
var newPodMeta, oldPodMeta *metav1.ObjectMeta
// we have to be careful about nil pointers here
// replication controller in particular is prone to passing nil
if podTemplate != nil {
newPodSpec = &podTemplate.Spec
newPodMeta = &podTemplate.ObjectMeta
}
if oldPodTemplate != nil {
oldPodSpec = &oldPodTemplate.Spec
oldPodMeta = &oldPodTemplate.ObjectMeta
}
return GetValidationOptionsFromPodSpec(newPodSpec, oldPodSpec)
return GetValidationOptionsFromPodSpecAndMeta(newPodSpec, oldPodSpec, newPodMeta, oldPodMeta)
}
// DropDisabledTemplateFields removes disabled fields from the pod template metadata and spec.

View File

@ -32,6 +32,7 @@ import (
"k8s.io/apimachinery/pkg/util/validation/field"
utilfeature "k8s.io/apiserver/pkg/util/feature"
featuregatetesting "k8s.io/component-base/featuregate/testing"
"k8s.io/kubernetes/pkg/apis/core"
api "k8s.io/kubernetes/pkg/apis/core"
"k8s.io/kubernetes/pkg/features"
)
@ -1328,3 +1329,53 @@ func TestDropEphemeralContainers(t *testing.T) {
}
}
}
func TestValidatePodDeletionCostOption(t *testing.T) {
testCases := []struct {
name string
oldPodMeta *metav1.ObjectMeta
featureEnabled bool
wantAllowInvalidPodDeletionCost bool
}{
{
name: "CreateFeatureEnabled",
featureEnabled: true,
wantAllowInvalidPodDeletionCost: false,
},
{
name: "CreateFeatureDisabled",
featureEnabled: false,
wantAllowInvalidPodDeletionCost: true,
},
{
name: "UpdateFeatureDisabled",
oldPodMeta: &metav1.ObjectMeta{Annotations: map[string]string{core.PodDeletionCost: "100"}},
featureEnabled: false,
wantAllowInvalidPodDeletionCost: true,
},
{
name: "UpdateFeatureEnabledValidOldValue",
oldPodMeta: &metav1.ObjectMeta{Annotations: map[string]string{core.PodDeletionCost: "100"}},
featureEnabled: true,
wantAllowInvalidPodDeletionCost: false,
},
{
name: "UpdateFeatureEnabledValidOldValue",
oldPodMeta: &metav1.ObjectMeta{Annotations: map[string]string{core.PodDeletionCost: "invalid-value"}},
featureEnabled: true,
wantAllowInvalidPodDeletionCost: true,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDeletionCost, tc.featureEnabled)()
// The new pod doesn't impact the outcome.
gotOptions := GetValidationOptionsFromPodSpecAndMeta(nil, nil, nil, tc.oldPodMeta)
if tc.wantAllowInvalidPodDeletionCost != gotOptions.AllowInvalidPodDeletionCost {
t.Errorf("unexpected diff, want: %v, got: %v", tc.wantAllowInvalidPodDeletionCost, gotOptions.AllowInvalidPodDeletionCost)
}
})
}
}

View File

@ -159,7 +159,7 @@ func TestCompatibility_v1_PodSecurityContext(t *testing.T) {
}
validator := func(obj runtime.Object) field.ErrorList {
opts := podutil.GetValidationOptionsFromPodSpec(&(obj.(*api.Pod).Spec), nil)
opts := podutil.GetValidationOptionsFromPodSpecAndMeta(&(obj.(*api.Pod).Spec), nil, &(obj.(*api.Pod).ObjectMeta), nil)
return validation.ValidatePodSpec(&(obj.(*api.Pod).Spec), &(obj.(*api.Pod).ObjectMeta), field.NewPath("spec"), opts)
}

View File

@ -46,7 +46,7 @@ func ValidateStatefulSetName(name string, prefix bool) []string {
}
// ValidatePodTemplateSpecForStatefulSet validates the given template and ensures that it is in accordance with the desired selector.
func ValidatePodTemplateSpecForStatefulSet(template *api.PodTemplateSpec, selector labels.Selector, fldPath *field.Path) field.ErrorList {
func ValidatePodTemplateSpecForStatefulSet(template *api.PodTemplateSpec, selector labels.Selector, fldPath *field.Path, opts apivalidation.PodValidationOptions) field.ErrorList {
allErrs := field.ErrorList{}
if template == nil {
allErrs = append(allErrs, field.Required(fldPath, ""))
@ -64,13 +64,13 @@ func ValidatePodTemplateSpecForStatefulSet(template *api.PodTemplateSpec, select
// allErrs = append(allErrs, apivalidation.ValidatePodTemplateSpec(template, fldPath)...)
allErrs = append(allErrs, unversionedvalidation.ValidateLabels(template.Labels, fldPath.Child("labels"))...)
allErrs = append(allErrs, apivalidation.ValidateAnnotations(template.Annotations, fldPath.Child("annotations"))...)
allErrs = append(allErrs, apivalidation.ValidatePodSpecificAnnotations(template.Annotations, &template.Spec, fldPath.Child("annotations"))...)
allErrs = append(allErrs, apivalidation.ValidatePodSpecificAnnotations(template.Annotations, &template.Spec, fldPath.Child("annotations"), opts)...)
}
return allErrs
}
// ValidateStatefulSetSpec tests if required fields in the StatefulSet spec are set.
func ValidateStatefulSetSpec(spec *apps.StatefulSetSpec, fldPath *field.Path) field.ErrorList {
func ValidateStatefulSetSpec(spec *apps.StatefulSetSpec, fldPath *field.Path, opts apivalidation.PodValidationOptions) field.ErrorList {
allErrs := field.ErrorList{}
switch spec.PodManagementPolicy {
@ -122,7 +122,7 @@ func ValidateStatefulSetSpec(spec *apps.StatefulSetSpec, fldPath *field.Path) fi
if err != nil {
allErrs = append(allErrs, field.Invalid(fldPath.Child("selector"), spec.Selector, ""))
} else {
allErrs = append(allErrs, ValidatePodTemplateSpecForStatefulSet(&spec.Template, selector, fldPath.Child("template"))...)
allErrs = append(allErrs, ValidatePodTemplateSpecForStatefulSet(&spec.Template, selector, fldPath.Child("template"), opts)...)
}
if spec.Template.Spec.RestartPolicy != api.RestartPolicyAlways {
@ -136,9 +136,9 @@ func ValidateStatefulSetSpec(spec *apps.StatefulSetSpec, fldPath *field.Path) fi
}
// ValidateStatefulSet validates a StatefulSet.
func ValidateStatefulSet(statefulSet *apps.StatefulSet) field.ErrorList {
func ValidateStatefulSet(statefulSet *apps.StatefulSet, opts apivalidation.PodValidationOptions) field.ErrorList {
allErrs := apivalidation.ValidateObjectMeta(&statefulSet.ObjectMeta, true, ValidateStatefulSetName, field.NewPath("metadata"))
allErrs = append(allErrs, ValidateStatefulSetSpec(&statefulSet.Spec, field.NewPath("spec"))...)
allErrs = append(allErrs, ValidateStatefulSetSpec(&statefulSet.Spec, field.NewPath("spec"), opts)...)
return allErrs
}

View File

@ -131,7 +131,7 @@ func TestValidateStatefulSet(t *testing.T) {
for i, successCase := range successCases {
t.Run("success case "+strconv.Itoa(i), func(t *testing.T) {
if errs := ValidateStatefulSet(&successCase); len(errs) != 0 {
if errs := ValidateStatefulSet(&successCase, corevalidation.PodValidationOptions{}); len(errs) != 0 {
t.Errorf("expected success: %v", errs)
}
})
@ -356,7 +356,7 @@ func TestValidateStatefulSet(t *testing.T) {
for k, v := range errorCases {
t.Run(k, func(t *testing.T) {
errs := ValidateStatefulSet(&v)
errs := ValidateStatefulSet(&v, corevalidation.PodValidationOptions{})
if len(errs) == 0 {
t.Errorf("expected failure for %s", k)
}

View File

@ -106,4 +106,14 @@ const (
// This annotation is used by the Attach Detach Controller to determine whether to use the in-tree or
// CSI Backend for a volume plugin on a specific node.
MigratedPluginsAnnotationKey = "storage.alpha.kubernetes.io/migrated-plugins"
// PodDeletionCost can be used to set to an int32 that represent the cost of deleting
// a pod compared to other pods belonging to the same ReplicaSet. Pods with lower
// deletion cost are preferred to be deleted before pods with higher deletion cost.
// Note that this is honored on a best-effort basis, and so it does not offer guarantees on
// pod deletion order.
// The implicit deletion cost for pods that don't set the annotation is 0, negative values are permitted.
//
// This annotation is alpha-level and is only honored when PodDeletionCost feature is enabled.
PodDeletionCost = "controller.kubernetes.io/pod-deletion-cost"
)

View File

@ -19,6 +19,7 @@ package helper
import (
"encoding/json"
"fmt"
"strconv"
"strings"
"k8s.io/apimachinery/pkg/api/resource"
@ -532,3 +533,29 @@ func ToPodResourcesSet(podSpec *core.PodSpec) sets.String {
}
return result
}
// GetDeletionCostFromPodAnnotations returns the integer value of pod-deletion-cost. Returns 0
// if not set or the value is invalid.
func GetDeletionCostFromPodAnnotations(annotations map[string]string) (int32, error) {
if value, exist := annotations[core.PodDeletionCost]; exist {
// values that start with plus sign (e.g, "+10") or leading zeros (e.g., "008") are not valid.
if !validFirstDigit(value) {
return 0, fmt.Errorf("invalid value %q", value)
}
i, err := strconv.ParseInt(value, 10, 32)
if err != nil {
// make sure we default to 0 on error.
return 0, err
}
return int32(i), nil
}
return 0, nil
}
func validFirstDigit(str string) bool {
if len(str) == 0 {
return false
}
return str[0] == '-' || (str[0] == '0' && str == "0") || (str[0] >= '1' && str[0] <= '9')
}

View File

@ -131,7 +131,7 @@ func ValidateDNS1123Subdomain(value string, fldPath *field.Path) field.ErrorList
return allErrs
}
func ValidatePodSpecificAnnotations(annotations map[string]string, spec *core.PodSpec, fldPath *field.Path) field.ErrorList {
func ValidatePodSpecificAnnotations(annotations map[string]string, spec *core.PodSpec, fldPath *field.Path, opts PodValidationOptions) field.ErrorList {
allErrs := field.ErrorList{}
if value, isMirror := annotations[core.MirrorPodAnnotationKey]; isMirror {
@ -144,6 +144,12 @@ func ValidatePodSpecificAnnotations(annotations map[string]string, spec *core.Po
allErrs = append(allErrs, ValidateTolerationsInPodAnnotations(annotations, fldPath)...)
}
if !opts.AllowInvalidPodDeletionCost {
if _, err := helper.GetDeletionCostFromPodAnnotations(annotations); err != nil {
allErrs = append(allErrs, field.Invalid(fldPath.Key(core.PodDeletionCost), annotations[core.PodDeletionCost], "must be a 32bit integer"))
}
}
allErrs = append(allErrs, ValidateSeccompPodAnnotations(annotations, fldPath)...)
allErrs = append(allErrs, ValidateAppArmorPodAnnotations(annotations, spec, fldPath)...)
@ -167,7 +173,7 @@ func ValidateTolerationsInPodAnnotations(annotations map[string]string, fldPath
return allErrs
}
func ValidatePodSpecificAnnotationUpdates(newPod, oldPod *core.Pod, fldPath *field.Path) field.ErrorList {
func ValidatePodSpecificAnnotationUpdates(newPod, oldPod *core.Pod, fldPath *field.Path, opts PodValidationOptions) field.ErrorList {
allErrs := field.ErrorList{}
newAnnotations := newPod.Annotations
oldAnnotations := oldPod.Annotations
@ -194,7 +200,7 @@ func ValidatePodSpecificAnnotationUpdates(newPod, oldPod *core.Pod, fldPath *fie
allErrs = append(allErrs, field.Forbidden(fldPath.Key(k), "may not add mirror pod annotation"))
}
}
allErrs = append(allErrs, ValidatePodSpecificAnnotations(newAnnotations, &newPod.Spec, fldPath)...)
allErrs = append(allErrs, ValidatePodSpecificAnnotations(newAnnotations, &newPod.Spec, fldPath, opts)...)
return allErrs
}
@ -3185,6 +3191,8 @@ type PodValidationOptions struct {
AllowMultipleHugePageResources bool
// Allow pod spec to use hugepages in downward API
AllowDownwardAPIHugePages bool
// Allow invalid pod-deletion-cost annotation value for backward compatibility.
AllowInvalidPodDeletionCost bool
}
// ValidatePodSingleHugePageResources checks if there are multiple huge
@ -3209,7 +3217,7 @@ func ValidatePodSingleHugePageResources(pod *core.Pod, specPath *field.Path) fie
func validatePodMetadataAndSpec(pod *core.Pod, opts PodValidationOptions) field.ErrorList {
fldPath := field.NewPath("metadata")
allErrs := ValidateObjectMeta(&pod.ObjectMeta, true, ValidatePodName, fldPath)
allErrs = append(allErrs, ValidatePodSpecificAnnotations(pod.ObjectMeta.Annotations, &pod.Spec, fldPath.Child("annotations"))...)
allErrs = append(allErrs, ValidatePodSpecificAnnotations(pod.ObjectMeta.Annotations, &pod.Spec, fldPath.Child("annotations"), opts)...)
allErrs = append(allErrs, ValidatePodSpec(&pod.Spec, &pod.ObjectMeta, field.NewPath("spec"), opts)...)
// we do additional validation only pertinent for pods and not pod templates
@ -3928,7 +3936,7 @@ func ValidatePodUpdate(newPod, oldPod *core.Pod, opts PodValidationOptions) fiel
fldPath := field.NewPath("metadata")
allErrs := ValidateObjectMetaUpdate(&newPod.ObjectMeta, &oldPod.ObjectMeta, fldPath)
allErrs = append(allErrs, validatePodMetadataAndSpec(newPod, opts)...)
allErrs = append(allErrs, ValidatePodSpecificAnnotationUpdates(newPod, oldPod, fldPath.Child("annotations"))...)
allErrs = append(allErrs, ValidatePodSpecificAnnotationUpdates(newPod, oldPod, fldPath.Child("annotations"), opts)...)
specPath := field.NewPath("spec")
if !opts.AllowMultipleHugePageResources {
@ -4035,10 +4043,10 @@ func ValidateContainerStateTransition(newStatuses, oldStatuses []core.ContainerS
// ValidatePodStatusUpdate tests to see if the update is legal for an end user to make. newPod is updated with fields
// that cannot be changed.
func ValidatePodStatusUpdate(newPod, oldPod *core.Pod) field.ErrorList {
func ValidatePodStatusUpdate(newPod, oldPod *core.Pod, opts PodValidationOptions) field.ErrorList {
fldPath := field.NewPath("metadata")
allErrs := ValidateObjectMetaUpdate(&newPod.ObjectMeta, &oldPod.ObjectMeta, fldPath)
allErrs = append(allErrs, ValidatePodSpecificAnnotationUpdates(newPod, oldPod, fldPath.Child("annotations"))...)
allErrs = append(allErrs, ValidatePodSpecificAnnotationUpdates(newPod, oldPod, fldPath.Child("annotations"), opts)...)
allErrs = append(allErrs, validatePodConditions(newPod.Status.Conditions, fldPath.Child("conditions"))...)
fldPath = field.NewPath("status")
@ -4573,7 +4581,7 @@ func ValidatePodTemplateSpec(spec *core.PodTemplateSpec, fldPath *field.Path, op
allErrs := field.ErrorList{}
allErrs = append(allErrs, unversionedvalidation.ValidateLabels(spec.Labels, fldPath.Child("labels"))...)
allErrs = append(allErrs, ValidateAnnotations(spec.Annotations, fldPath.Child("annotations"))...)
allErrs = append(allErrs, ValidatePodSpecificAnnotations(spec.Annotations, &spec.Spec, fldPath.Child("annotations"))...)
allErrs = append(allErrs, ValidatePodSpecificAnnotations(spec.Annotations, &spec.Spec, fldPath.Child("annotations"), opts)...)
allErrs = append(allErrs, ValidatePodSpec(&spec.Spec, nil, fldPath.Child("spec"), opts)...)
allErrs = append(allErrs, validateSeccompAnnotationsAndFields(spec.ObjectMeta, &spec.Spec, fldPath.Child("spec"))...)

View File

@ -7838,6 +7838,22 @@ func TestValidatePod(t *testing.T) {
DNSPolicy: core.DNSClusterFirst,
},
},
"negative pod-deletion-cost": {
ObjectMeta: metav1.ObjectMeta{Name: "123", Namespace: "ns", Annotations: map[string]string{core.PodDeletionCost: "-100"}},
Spec: core.PodSpec{
Containers: []core.Container{{Name: "ctr", Image: "image", ImagePullPolicy: "IfNotPresent", TerminationMessagePolicy: "File"}},
RestartPolicy: core.RestartPolicyAlways,
DNSPolicy: core.DNSClusterFirst,
},
},
"positive pod-deletion-cost": {
ObjectMeta: metav1.ObjectMeta{Name: "123", Namespace: "ns", Annotations: map[string]string{core.PodDeletionCost: "100"}},
Spec: core.PodSpec{
Containers: []core.Container{{Name: "ctr", Image: "image", ImagePullPolicy: "IfNotPresent", TerminationMessagePolicy: "File"}},
RestartPolicy: core.RestartPolicyAlways,
DNSPolicy: core.DNSClusterFirst,
},
},
}
for k, v := range successCases {
t.Run(k, func(t *testing.T) {
@ -8764,10 +8780,43 @@ func TestValidatePod(t *testing.T) {
},
},
},
"invalid pod-deletion-cost": {
expectedError: "metadata.annotations[controller.kubernetes.io/pod-deletion-cost]: Invalid value: \"text\": must be a 32bit integer",
spec: core.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "123", Namespace: "ns", Annotations: map[string]string{core.PodDeletionCost: "text"}},
Spec: core.PodSpec{
Containers: []core.Container{{Name: "ctr", Image: "image", ImagePullPolicy: "IfNotPresent", TerminationMessagePolicy: "File"}},
RestartPolicy: core.RestartPolicyAlways,
DNSPolicy: core.DNSClusterFirst,
},
},
},
"invalid leading zeros pod-deletion-cost": {
expectedError: "metadata.annotations[controller.kubernetes.io/pod-deletion-cost]: Invalid value: \"008\": must be a 32bit integer",
spec: core.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "123", Namespace: "ns", Annotations: map[string]string{core.PodDeletionCost: "008"}},
Spec: core.PodSpec{
Containers: []core.Container{{Name: "ctr", Image: "image", ImagePullPolicy: "IfNotPresent", TerminationMessagePolicy: "File"}},
RestartPolicy: core.RestartPolicyAlways,
DNSPolicy: core.DNSClusterFirst,
},
},
},
"invalid leading plus sign pod-deletion-cost": {
expectedError: "metadata.annotations[controller.kubernetes.io/pod-deletion-cost]: Invalid value: \"+10\": must be a 32bit integer",
spec: core.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "123", Namespace: "ns", Annotations: map[string]string{core.PodDeletionCost: "+10"}},
Spec: core.PodSpec{
Containers: []core.Container{{Name: "ctr", Image: "image", ImagePullPolicy: "IfNotPresent", TerminationMessagePolicy: "File"}},
RestartPolicy: core.RestartPolicyAlways,
DNSPolicy: core.DNSClusterFirst,
},
},
},
}
for k, v := range errorCases {
t.Run(k, func(t *testing.T) {
if errs := ValidatePodCreate(&v.spec, PodValidationOptions{}); len(errs) == 0 {
if errs := ValidatePodCreate(&v.spec, PodValidationOptions{AllowInvalidPodDeletionCost: false}); len(errs) == 0 {
t.Errorf("expected failure")
} else if v.expectedError == "" {
t.Errorf("missing expectedError, got %q", errs.ToAggregate().Error())
@ -9711,7 +9760,7 @@ func TestValidatePodStatusUpdate(t *testing.T) {
for _, test := range tests {
test.new.ObjectMeta.ResourceVersion = "1"
test.old.ObjectMeta.ResourceVersion = "1"
errs := ValidatePodStatusUpdate(&test.new, &test.old)
errs := ValidatePodStatusUpdate(&test.new, &test.old, PodValidationOptions{})
if test.err == "" {
if len(errs) != 0 {
t.Errorf("unexpected invalid: %s (%+v)\nA: %+v\nB: %+v", test.test, errs, test.new, test.old)
@ -16350,7 +16399,7 @@ func TestPodIPsValidation(t *testing.T) {
oldPod.ResourceVersion = "1"
oldPod.Name = newPod.Name
errs := ValidatePodStatusUpdate(newPod, oldPod)
errs := ValidatePodStatusUpdate(newPod, oldPod, PodValidationOptions{})
if oldTestCase.expectError {
// The old pod was invalid, tolerate invalid IPs in the new pod as well
if len(errs) > 0 {

View File

@ -39,13 +39,16 @@ import (
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/strategicpatch"
"k8s.io/apimachinery/pkg/util/wait"
utilfeature "k8s.io/apiserver/pkg/util/feature"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/tools/record"
clientretry "k8s.io/client-go/util/retry"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
"k8s.io/kubernetes/pkg/apis/core/helper"
_ "k8s.io/kubernetes/pkg/apis/core/install"
"k8s.io/kubernetes/pkg/apis/core/validation"
"k8s.io/kubernetes/pkg/features"
hashutil "k8s.io/kubernetes/pkg/util/hash"
taintutils "k8s.io/kubernetes/pkg/util/taints"
"k8s.io/utils/integer"
@ -791,15 +794,17 @@ func (s ActivePods) Less(i, j int) bool {
// is unknown, and a pod whose phase is unknown comes before a running pod.
// 3. If exactly one of the pods is ready, the pod that is not ready comes
// before the ready pod.
// 4. If the pods' ranks differ, the pod with greater rank comes before the pod
// 4. If controller.kubernetes.io/pod-deletion-cost annotation is set, then
// the pod with the lower value will come first.
// 5. If the pods' ranks differ, the pod with greater rank comes before the pod
// with lower rank.
// 5. If both pods are ready but have not been ready for the same amount of
// 6. If both pods are ready but have not been ready for the same amount of
// time, the pod that has been ready for a shorter amount of time comes
// before the pod that has been ready for longer.
// 6. If one pod has a container that has restarted more than any container in
// 7. If one pod has a container that has restarted more than any container in
// the other pod, the pod with the container with more restarts comes
// before the other pod.
// 7. If the pods' creation times differ, the pod that was created more recently
// 8. If the pods' creation times differ, the pod that was created more recently
// comes before the older pod.
//
// If none of these rules matches, the second pod comes before the first pod.
@ -842,7 +847,17 @@ func (s ActivePodsWithRanks) Less(i, j int) bool {
if podutil.IsPodReady(s.Pods[i]) != podutil.IsPodReady(s.Pods[j]) {
return !podutil.IsPodReady(s.Pods[i])
}
// 4. Doubled up < not doubled up
// 4. higher pod-deletion-cost < lower pod-deletion cost
if utilfeature.DefaultFeatureGate.Enabled(features.PodDeletionCost) {
pi, _ := helper.GetDeletionCostFromPodAnnotations(s.Pods[i].Annotations)
pj, _ := helper.GetDeletionCostFromPodAnnotations(s.Pods[j].Annotations)
if pi != pj {
return pi < pj
}
}
// 5. Doubled up < not doubled up
// If one of the two pods is on the same node as one or more additional
// ready pods that belong to the same replicaset, whichever pod has more
// colocated ready pods is less
@ -851,7 +866,7 @@ func (s ActivePodsWithRanks) Less(i, j int) bool {
}
// TODO: take availability into account when we push minReadySeconds information from deployment into pods,
// see https://github.com/kubernetes/kubernetes/issues/22065
// 5. Been ready for empty time < less time < more time
// 6. Been ready for empty time < less time < more time
// If both pods are ready, the latest ready one is smaller
if podutil.IsPodReady(s.Pods[i]) && podutil.IsPodReady(s.Pods[j]) {
readyTime1 := podReadyTime(s.Pods[i])
@ -860,11 +875,11 @@ func (s ActivePodsWithRanks) Less(i, j int) bool {
return afterOrZero(readyTime1, readyTime2)
}
}
// 6. Pods with containers with higher restart counts < lower restart counts
// 7. Pods with containers with higher restart counts < lower restart counts
if maxContainerRestarts(s.Pods[i]) != maxContainerRestarts(s.Pods[j]) {
return maxContainerRestarts(s.Pods[i]) > maxContainerRestarts(s.Pods[j])
}
// 7. Empty creation time pods < newer pods < older pods
// 8. Empty creation time pods < newer pods < older pods
if !s.Pods[i].CreationTimestamp.Equal(&s.Pods[j].CreationTimestamp) {
return afterOrZero(&s.Pods[i].CreationTimestamp, &s.Pods[j].CreationTimestamp)
}

View File

@ -38,6 +38,7 @@ import (
"k8s.io/apimachinery/pkg/util/clock"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/uuid"
utilfeature "k8s.io/apiserver/pkg/util/feature"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/client-go/kubernetes/fake"
clientscheme "k8s.io/client-go/kubernetes/scheme"
@ -45,8 +46,11 @@ import (
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/tools/record"
utiltesting "k8s.io/client-go/util/testing"
featuregatetesting "k8s.io/component-base/featuregate/testing"
"k8s.io/kubernetes/pkg/apis/core"
_ "k8s.io/kubernetes/pkg/apis/core/install"
"k8s.io/kubernetes/pkg/controller/testutil"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/securitycontext"
"github.com/stretchr/testify/assert"
@ -435,7 +439,7 @@ func TestSortingActivePodsWithRanks(t *testing.T) {
now := metav1.Now()
then := metav1.Time{Time: now.AddDate(0, -1, 0)}
zeroTime := metav1.Time{}
pod := func(podName, nodeName string, phase v1.PodPhase, ready bool, restarts int32, readySince metav1.Time, created metav1.Time) *v1.Pod {
pod := func(podName, nodeName string, phase v1.PodPhase, ready bool, restarts int32, readySince metav1.Time, created metav1.Time, annotations map[string]string) *v1.Pod {
var conditions []v1.PodCondition
var containerStatuses []v1.ContainerStatus
if ready {
@ -446,6 +450,7 @@ func TestSortingActivePodsWithRanks(t *testing.T) {
ObjectMeta: metav1.ObjectMeta{
CreationTimestamp: created,
Name: podName,
Annotations: annotations,
},
Spec: v1.PodSpec{NodeName: nodeName},
Status: v1.PodStatus{
@ -456,15 +461,17 @@ func TestSortingActivePodsWithRanks(t *testing.T) {
}
}
var (
unscheduledPod = pod("unscheduled", "", v1.PodPending, false, 0, zeroTime, zeroTime)
scheduledPendingPod = pod("pending", "node", v1.PodPending, false, 0, zeroTime, zeroTime)
unknownPhasePod = pod("unknown-phase", "node", v1.PodUnknown, false, 0, zeroTime, zeroTime)
runningNotReadyPod = pod("not-ready", "node", v1.PodRunning, false, 0, zeroTime, zeroTime)
runningReadyNoLastTransitionTimePod = pod("ready-no-last-transition-time", "node", v1.PodRunning, true, 0, zeroTime, zeroTime)
runningReadyNow = pod("ready-now", "node", v1.PodRunning, true, 0, now, now)
runningReadyThen = pod("ready-then", "node", v1.PodRunning, true, 0, then, then)
runningReadyNowHighRestarts = pod("ready-high-restarts", "node", v1.PodRunning, true, 9001, now, now)
runningReadyNowCreatedThen = pod("ready-now-created-then", "node", v1.PodRunning, true, 0, now, then)
unscheduledPod = pod("unscheduled", "", v1.PodPending, false, 0, zeroTime, zeroTime, nil)
scheduledPendingPod = pod("pending", "node", v1.PodPending, false, 0, zeroTime, zeroTime, nil)
unknownPhasePod = pod("unknown-phase", "node", v1.PodUnknown, false, 0, zeroTime, zeroTime, nil)
runningNotReadyPod = pod("not-ready", "node", v1.PodRunning, false, 0, zeroTime, zeroTime, nil)
runningReadyNoLastTransitionTimePod = pod("ready-no-last-transition-time", "node", v1.PodRunning, true, 0, zeroTime, zeroTime, nil)
runningReadyNow = pod("ready-now", "node", v1.PodRunning, true, 0, now, now, nil)
runningReadyThen = pod("ready-then", "node", v1.PodRunning, true, 0, then, then, nil)
runningReadyNowHighRestarts = pod("ready-high-restarts", "node", v1.PodRunning, true, 9001, now, now, nil)
runningReadyNowCreatedThen = pod("ready-now-created-then", "node", v1.PodRunning, true, 0, now, then, nil)
lowPodDeletionCost = pod("low-deletion-cost", "node", v1.PodRunning, true, 0, now, then, map[string]string{core.PodDeletionCost: "10"})
highPodDeletionCost = pod("high-deletion-cost", "node", v1.PodRunning, true, 0, now, then, map[string]string{core.PodDeletionCost: "100"})
)
equalityTests := []*v1.Pod{
unscheduledPod,
@ -491,33 +498,40 @@ func TestSortingActivePodsWithRanks(t *testing.T) {
rank int
}
inequalityTests := []struct {
lesser, greater podWithRank
lesser, greater podWithRank
disablePodDeletioncost bool
}{
{podWithRank{unscheduledPod, 1}, podWithRank{scheduledPendingPod, 2}},
{podWithRank{unscheduledPod, 2}, podWithRank{scheduledPendingPod, 1}},
{podWithRank{scheduledPendingPod, 1}, podWithRank{unknownPhasePod, 2}},
{podWithRank{unknownPhasePod, 1}, podWithRank{runningNotReadyPod, 2}},
{podWithRank{runningNotReadyPod, 1}, podWithRank{runningReadyNoLastTransitionTimePod, 1}},
{podWithRank{runningReadyNoLastTransitionTimePod, 1}, podWithRank{runningReadyNow, 1}},
{podWithRank{runningReadyNow, 2}, podWithRank{runningReadyNoLastTransitionTimePod, 1}},
{podWithRank{runningReadyNow, 1}, podWithRank{runningReadyThen, 1}},
{podWithRank{runningReadyNow, 2}, podWithRank{runningReadyThen, 1}},
{podWithRank{runningReadyNowHighRestarts, 1}, podWithRank{runningReadyNow, 1}},
{podWithRank{runningReadyNow, 2}, podWithRank{runningReadyNowHighRestarts, 1}},
{podWithRank{runningReadyNow, 1}, podWithRank{runningReadyNowCreatedThen, 1}},
{podWithRank{runningReadyNowCreatedThen, 2}, podWithRank{runningReadyNow, 1}},
{lesser: podWithRank{unscheduledPod, 1}, greater: podWithRank{scheduledPendingPod, 2}},
{lesser: podWithRank{unscheduledPod, 2}, greater: podWithRank{scheduledPendingPod, 1}},
{lesser: podWithRank{scheduledPendingPod, 1}, greater: podWithRank{unknownPhasePod, 2}},
{lesser: podWithRank{unknownPhasePod, 1}, greater: podWithRank{runningNotReadyPod, 2}},
{lesser: podWithRank{runningNotReadyPod, 1}, greater: podWithRank{runningReadyNoLastTransitionTimePod, 1}},
{lesser: podWithRank{runningReadyNoLastTransitionTimePod, 1}, greater: podWithRank{runningReadyNow, 1}},
{lesser: podWithRank{runningReadyNow, 2}, greater: podWithRank{runningReadyNoLastTransitionTimePod, 1}},
{lesser: podWithRank{runningReadyNow, 1}, greater: podWithRank{runningReadyThen, 1}},
{lesser: podWithRank{runningReadyNow, 2}, greater: podWithRank{runningReadyThen, 1}},
{lesser: podWithRank{runningReadyNowHighRestarts, 1}, greater: podWithRank{runningReadyNow, 1}},
{lesser: podWithRank{runningReadyNow, 2}, greater: podWithRank{runningReadyNowHighRestarts, 1}},
{lesser: podWithRank{runningReadyNow, 1}, greater: podWithRank{runningReadyNowCreatedThen, 1}},
{lesser: podWithRank{runningReadyNowCreatedThen, 2}, greater: podWithRank{runningReadyNow, 1}},
{lesser: podWithRank{lowPodDeletionCost, 2}, greater: podWithRank{highPodDeletionCost, 1}},
{lesser: podWithRank{highPodDeletionCost, 2}, greater: podWithRank{lowPodDeletionCost, 1}, disablePodDeletioncost: true},
}
for _, test := range inequalityTests {
podsWithRanks := ActivePodsWithRanks{
Pods: []*v1.Pod{test.lesser.pod, test.greater.pod},
Rank: []int{test.lesser.rank, test.greater.rank},
}
if !podsWithRanks.Less(0, 1) {
t.Errorf("expected pod %q with rank %v to be less than %q with rank %v", podsWithRanks.Pods[0].Name, podsWithRanks.Rank[0], podsWithRanks.Pods[1].Name, podsWithRanks.Rank[1])
}
if podsWithRanks.Less(1, 0) {
t.Errorf("expected pod %q with rank %v not to be less than %v with rank %v", podsWithRanks.Pods[1].Name, podsWithRanks.Rank[1], podsWithRanks.Pods[0].Name, podsWithRanks.Rank[0])
}
for i, test := range inequalityTests {
t.Run(fmt.Sprintf("test%d", i), func(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDeletionCost, !test.disablePodDeletioncost)()
podsWithRanks := ActivePodsWithRanks{
Pods: []*v1.Pod{test.lesser.pod, test.greater.pod},
Rank: []int{test.lesser.rank, test.greater.rank},
}
if !podsWithRanks.Less(0, 1) {
t.Errorf("expected pod %q with rank %v to be less than %q with rank %v", podsWithRanks.Pods[0].Name, podsWithRanks.Rank[0], podsWithRanks.Pods[1].Name, podsWithRanks.Rank[1])
}
if podsWithRanks.Less(1, 0) {
t.Errorf("expected pod %q with rank %v not to be less than %v with rank %v", podsWithRanks.Pods[1].Name, podsWithRanks.Rank[1], podsWithRanks.Pods[0].Name, podsWithRanks.Rank[0])
}
})
}
}

View File

@ -663,6 +663,12 @@ const (
//
// Enables the usage of different protocols in the same Service with type=LoadBalancer
MixedProtocolLBService featuregate.Feature = "MixedProtocolLBService"
// owner: @ahg-g
// alpha: v1.21
//
// Enables controlling pod ranking on replicaset scale-down.
PodDeletionCost featuregate.Feature = "PodDeletionCost"
)
func init() {
@ -763,6 +769,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
MixedProtocolLBService: {Default: false, PreRelease: featuregate.Alpha},
PreferNominatedNode: {Default: false, PreRelease: featuregate.Alpha},
RunAsGroup: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.22
PodDeletionCost: {Default: false, PreRelease: featuregate.Alpha},
// inherited features from generic apiserver, relisted here to get a conflict if it is changed
// unintentionally on either side:

View File

@ -96,7 +96,8 @@ func (statefulSetStrategy) PrepareForUpdate(ctx context.Context, obj, old runtim
// Validate validates a new StatefulSet.
func (statefulSetStrategy) Validate(ctx context.Context, obj runtime.Object) field.ErrorList {
statefulSet := obj.(*apps.StatefulSet)
return validation.ValidateStatefulSet(statefulSet)
opts := pod.GetValidationOptionsFromPodTemplate(&statefulSet.Spec.Template, nil)
return validation.ValidateStatefulSet(statefulSet, opts)
}
// Canonicalize normalizes the object after validation.
@ -112,7 +113,9 @@ func (statefulSetStrategy) AllowCreateOnUpdate() bool {
func (statefulSetStrategy) ValidateUpdate(ctx context.Context, obj, old runtime.Object) field.ErrorList {
newStatefulSet := obj.(*apps.StatefulSet)
oldStatefulSet := old.(*apps.StatefulSet)
validationErrorList := validation.ValidateStatefulSet(newStatefulSet)
opts := pod.GetValidationOptionsFromPodTemplate(&newStatefulSet.Spec.Template, &oldStatefulSet.Spec.Template)
validationErrorList := validation.ValidateStatefulSet(newStatefulSet, opts)
updateErrorList := validation.ValidateStatefulSetUpdate(newStatefulSet, oldStatefulSet)
return append(validationErrorList, updateErrorList...)
}

View File

@ -91,7 +91,7 @@ func (podStrategy) PrepareForUpdate(ctx context.Context, obj, old runtime.Object
// Validate validates a new pod.
func (podStrategy) Validate(ctx context.Context, obj runtime.Object) field.ErrorList {
pod := obj.(*api.Pod)
opts := podutil.GetValidationOptionsFromPodSpec(&pod.Spec, nil)
opts := podutil.GetValidationOptionsFromPodSpecAndMeta(&pod.Spec, nil, &pod.ObjectMeta, nil)
return validation.ValidatePodCreate(pod, opts)
}
@ -109,7 +109,7 @@ func (podStrategy) ValidateUpdate(ctx context.Context, obj, old runtime.Object)
// Allow downward api usage of hugepages on pod update if feature is enabled or if the old pod already had used them.
pod := obj.(*api.Pod)
oldPod := old.(*api.Pod)
opts := podutil.GetValidationOptionsFromPodSpec(&pod.Spec, &oldPod.Spec)
opts := podutil.GetValidationOptionsFromPodSpecAndMeta(&pod.Spec, &oldPod.Spec, &pod.ObjectMeta, &oldPod.ObjectMeta)
return validation.ValidatePodUpdate(obj.(*api.Pod), old.(*api.Pod), opts)
}
@ -167,7 +167,11 @@ func (podStatusStrategy) PrepareForUpdate(ctx context.Context, obj, old runtime.
}
func (podStatusStrategy) ValidateUpdate(ctx context.Context, obj, old runtime.Object) field.ErrorList {
return validation.ValidatePodStatusUpdate(obj.(*api.Pod), old.(*api.Pod))
pod := obj.(*api.Pod)
oldPod := old.(*api.Pod)
opts := podutil.GetValidationOptionsFromPodSpecAndMeta(&pod.Spec, &oldPod.Spec, &pod.ObjectMeta, &oldPod.ObjectMeta)
return validation.ValidatePodStatusUpdate(obj.(*api.Pod), old.(*api.Pod), opts)
}
type podEphemeralContainersStrategy struct {
@ -180,7 +184,7 @@ var EphemeralContainersStrategy = podEphemeralContainersStrategy{Strategy}
func (podEphemeralContainersStrategy) ValidateUpdate(ctx context.Context, obj, old runtime.Object) field.ErrorList {
newPod := obj.(*api.Pod)
oldPod := old.(*api.Pod)
opts := podutil.GetValidationOptionsFromPodSpec(&newPod.Spec, &oldPod.Spec)
opts := podutil.GetValidationOptionsFromPodSpecAndMeta(&newPod.Spec, &oldPod.Spec, &newPod.ObjectMeta, &oldPod.ObjectMeta)
return validation.ValidatePodEphemeralContainersUpdate(newPod, oldPod, opts)
}

View File

@ -128,4 +128,14 @@ const (
// This annotation is used by the Attach Detach Controller to determine whether to use the in-tree or
// CSI Backend for a volume plugin on a specific node.
MigratedPluginsAnnotationKey = "storage.alpha.kubernetes.io/migrated-plugins"
// PodDeletionCost can be used to set to an int32 that represent the cost of deleting
// a pod compared to other pods belonging to the same ReplicaSet. Pods with lower
// deletion cost are preferred to be deleted before pods with higher deletion cost.
// Note that this is honored on a best-effort basis, and so it does not offer guarantees on
// pod deletion order.
// The implicit deletion cost for pods that don't set the annotation is 0, negative values are permitted.
//
// This annotation is alpha-level and is only honored when PodDeletionCost feature is enabled.
PodDeletionCost = "controller.kubernetes.io/pod-deletion-cost"
)