EvenPodsSpread: api changes

This commit is contained in:
Wei Huang 2019-04-26 10:08:52 -07:00
parent 8d4c49faae
commit 49da505a9a
No known key found for this signature in database
GPG Key ID: BE5E9752F8B6E005
6 changed files with 333 additions and 1 deletions

View File

@ -401,6 +401,11 @@ func dropDisabledFields(
// does not specify any values for these fields.
podSpec.PreemptionPolicy = nil
}
if !utilfeature.DefaultFeatureGate.Enabled(features.EvenPodsSpread) && !topologySpreadConstraintsInUse(oldPodSpec) {
// Set TopologySpreadConstraints to nil only if feature is disabled and it is not used
podSpec.TopologySpreadConstraints = nil
}
}
// dropDisabledRunAsGroupField removes disabled fields from PodSpec related
@ -562,7 +567,14 @@ func overheadInUse(podSpec *api.PodSpec) bool {
return true
}
return false
}
// topologySpreadConstraintsInUse returns true if the pod spec is non-nil and has a TopologySpreadConstraints slice
func topologySpreadConstraintsInUse(podSpec *api.PodSpec) bool {
if podSpec == nil {
return false
}
return len(podSpec.TopologySpreadConstraints) > 0
}
// procMountInUse returns true if the pod spec is non-nil and has a SecurityContext's ProcMount field set to a non-default value

View File

@ -2715,6 +2715,13 @@ type PodSpec struct {
// If not specified, the default is true.
// +optional
EnableServiceLinks *bool
// TopologySpreadConstraints describes how a group of pods ought to spread across topology
// domains. Scheduler will schedule pods in a way which abides by the constraints.
// This field is alpha-level and is only honored by clusters that enables the EvenPodsSpread
// feature.
// All topologySpreadConstraints are ANDed.
// +optional
TopologySpreadConstraints []TopologySpreadConstraint
}
// HostAlias holds the mapping between IP and hostnames that will be injected as an entry in the
@ -4834,3 +4841,64 @@ const (
// DefaultHardPodAffinityWeight defines the weight of the implicit PreferredDuringScheduling affinity rule.
DefaultHardPodAffinitySymmetricWeight int32 = 1
)
type UnsatisfiableConstraintAction string
const (
// DoNotSchedule instructs the scheduler not to schedule the pod
// when constraints are not satisfied.
DoNotSchedule UnsatisfiableConstraintAction = "DoNotSchedule"
// ScheduleAnyway instructs the scheduler to schedule the pod
// even if constraints are not satisfied.
ScheduleAnyway UnsatisfiableConstraintAction = "ScheduleAnyway"
)
// TopologySpreadConstraint specifies how to spread matching pods among the given topology.
type TopologySpreadConstraint struct {
// MaxSkew describes the degree to which pods may be unevenly distributed.
// It's the maximum permitted difference between the number of matching pods in
// any two topology domains of a given topology type.
// For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same
// labelSelector spread as 1/1/0:
// +-------+-------+-------+
// | zone1 | zone2 | zone3 |
// +-------+-------+-------+
// | P | P | |
// +-------+-------+-------+
// - if MaxSkew is 1, incoming pod can only be scheduled to zone3 to become 1/1/1;
// scheduling it onto zone1(zone2) would make the ActualSkew(2-0) on zone1(zone2)
// violate MaxSkew(1).
// - if MaxSkew is 2, incoming pod can be scheduled onto any zone.
// It's a required field. Default value is 1 and 0 is not allowed.
MaxSkew int32
// TopologyKey is the key of node labels. Nodes that have a label with this key
// and identical values are considered to be in the same topology.
// We consider each <key, value> as a "bucket", and try to put balanced number
// of pods into each bucket.
// It's a required field.
TopologyKey string
// WhenUnsatisfiable indicates how to deal with a pod if it doesn't satisfy
// the spread constraint.
// - DoNotSchedule (default) tells the scheduler not to schedule it
// - ScheduleAnyway tells the scheduler to still schedule it
// It's considered as "Unsatisfiable" if and only if placing incoming pod on any
// topology violates "MaxSkew".
// For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same
// labelSelector spread as 3/1/1:
// +-------+-------+-------+
// | zone1 | zone2 | zone3 |
// +-------+-------+-------+
// | P P P | P | P |
// +-------+-------+-------+
// If WhenUnsatisfiable is set to DoNotSchedule, incoming pod can only be scheduled
// to zone2(zone3) to become 3/2/1(3/1/2) as ActualSkew(2-1) on zone2(zone3) satisfies
// MaxSkew(1). In other words, the cluster can still be imbalanced, but scheduler
// won't make it *more* imbalanced.
// It's a required field.
WhenUnsatisfiable UnsatisfiableConstraintAction
// LabelSelector is used to find matching pods.
// Pods that match this label selector are counted to determine the number of pods
// in their corresponding topology domain.
// +optional
LabelSelector *metav1.LabelSelector
}

View File

@ -3091,6 +3091,7 @@ func ValidatePodSpec(spec *core.PodSpec, fldPath *field.Path) field.ErrorList {
allErrs = append(allErrs, validateAffinity(spec.Affinity, fldPath.Child("affinity"))...)
allErrs = append(allErrs, validatePodDNSConfig(spec.DNSConfig, &spec.DNSPolicy, fldPath.Child("dnsConfig"))...)
allErrs = append(allErrs, validateReadinessGates(spec.ReadinessGates, fldPath.Child("readinessGates"))...)
allErrs = append(allErrs, validateTopologySpreadConstraints(spec.TopologySpreadConstraints, fldPath.Child("topologySpreadConstraints"))...)
if len(spec.ServiceAccountName) > 0 {
for _, msg := range ValidateServiceAccountName(spec.ServiceAccountName, false) {
allErrs = append(allErrs, field.Invalid(fldPath.Child("serviceAccountName"), spec.ServiceAccountName, msg))
@ -5561,3 +5562,78 @@ func ValidateProcMountType(fldPath *field.Path, procMountType core.ProcMountType
return field.NotSupported(fldPath, procMountType, []string{string(core.DefaultProcMount), string(core.UnmaskedProcMount)})
}
}
var (
supportedScheduleActions = sets.NewString(string(core.DoNotSchedule), string(core.ScheduleAnyway))
)
type spreadConstraintPair struct {
topologyKey string
whenUnsatisfiable core.UnsatisfiableConstraintAction
}
// validateTopologySpreadConstraints validates given TopologySpreadConstraints.
func validateTopologySpreadConstraints(constraints []core.TopologySpreadConstraint, fldPath *field.Path) field.ErrorList {
allErrs := field.ErrorList{}
var existingConstraintPairs []spreadConstraintPair
for i, constraint := range constraints {
subFldPath := fldPath.Index(i)
if err := ValidateMaxSkew(subFldPath.Child("maxSkew"), constraint.MaxSkew); err != nil {
allErrs = append(allErrs, err)
}
if err := ValidateTopologyKey(subFldPath.Child("topologyKey"), constraint.TopologyKey); err != nil {
allErrs = append(allErrs, err)
}
if err := ValidateWhenUnsatisfiable(subFldPath.Child("whenUnsatisfiable"), constraint.WhenUnsatisfiable); err != nil {
allErrs = append(allErrs, err)
}
// tuple {topologyKey, whenUnsatisfiable} denotes one kind of spread constraint
pair := spreadConstraintPair{
topologyKey: constraint.TopologyKey,
whenUnsatisfiable: constraint.WhenUnsatisfiable,
}
if err := ValidateSpreadConstraintPair(subFldPath.Child("{topologyKey, whenUnsatisfiable}"), pair, existingConstraintPairs); err != nil {
allErrs = append(allErrs, err)
} else {
existingConstraintPairs = append(existingConstraintPairs, pair)
}
}
return allErrs
}
// ValidateMaxSkew tests that the argument is a valid MaxSkew.
func ValidateMaxSkew(fldPath *field.Path, maxSkew int32) *field.Error {
if maxSkew <= 0 {
return field.Invalid(fldPath, maxSkew, isNotPositiveErrorMsg)
}
return nil
}
// ValidateTopologyKey tests that the argument is a valid TopologyKey.
func ValidateTopologyKey(fldPath *field.Path, topologyKey string) *field.Error {
if len(topologyKey) == 0 {
return field.Required(fldPath, "can not be empty")
}
return nil
}
// ValidateWhenUnsatisfiable tests that the argument is a valid UnsatisfiableConstraintAction.
func ValidateWhenUnsatisfiable(fldPath *field.Path, action core.UnsatisfiableConstraintAction) *field.Error {
if !supportedScheduleActions.Has(string(action)) {
return field.NotSupported(fldPath, action, supportedScheduleActions.List())
}
return nil
}
// ValidateSpreadConstraintPair tests that if `pair` exists in `existingConstraintPairs`.
func ValidateSpreadConstraintPair(fldPath *field.Path, pair spreadConstraintPair, existingConstraintPairs []spreadConstraintPair) *field.Error {
for _, existingPair := range existingConstraintPairs {
if pair.topologyKey == existingPair.topologyKey &&
pair.whenUnsatisfiable == existingPair.whenUnsatisfiable {
return field.Duplicate(fldPath, pair)
}
}
return nil
}

View File

@ -13663,7 +13663,6 @@ func testDataSourceInSpec(name string, kind string, apiGroup string) *core.Persi
}
func TestAlphaVolumePVCDataSource(t *testing.T) {
testCases := []struct {
testName string
claimSpec core.PersistentVolumeClaimSpec
@ -13704,7 +13703,104 @@ func TestAlphaVolumePVCDataSource(t *testing.T) {
if errs := ValidatePersistentVolumeClaimSpec(&tc.claimSpec, field.NewPath("spec")); len(errs) != 0 {
t.Errorf("expected success: %v", errs)
}
}
}
}
func TestValidateTopologySpreadConstraints(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.EvenPodsSpread, true)()
testCases := []struct {
name string
constraints []core.TopologySpreadConstraint
errtype field.ErrorType
errfield string
}{
{
name: "all required fields ok",
constraints: []core.TopologySpreadConstraint{
{MaxSkew: 1, TopologyKey: "k8s.io/zone", WhenUnsatisfiable: core.DoNotSchedule},
},
},
{
name: "missing MaxSkew",
constraints: []core.TopologySpreadConstraint{
{TopologyKey: "k8s.io/zone", WhenUnsatisfiable: core.DoNotSchedule},
},
errtype: field.ErrorTypeInvalid,
errfield: "maxSkew",
},
{
name: "invalid MaxSkew",
constraints: []core.TopologySpreadConstraint{
{MaxSkew: 0, TopologyKey: "k8s.io/zone", WhenUnsatisfiable: core.DoNotSchedule},
},
errtype: field.ErrorTypeInvalid,
errfield: "maxSkew",
},
{
name: "missing TopologyKey",
constraints: []core.TopologySpreadConstraint{
{MaxSkew: 1, WhenUnsatisfiable: core.DoNotSchedule},
},
errtype: field.ErrorTypeRequired,
errfield: "topologyKey",
},
{
name: "missing scheduling mode",
constraints: []core.TopologySpreadConstraint{
{MaxSkew: 1, TopologyKey: "k8s.io/zone"},
},
errtype: field.ErrorTypeNotSupported,
errfield: "whenUnsatisfiable",
},
{
name: "unsupported scheduling mode",
constraints: []core.TopologySpreadConstraint{
{MaxSkew: 1, TopologyKey: "k8s.io/zone", WhenUnsatisfiable: core.UnsatisfiableConstraintAction("N/A")},
},
errtype: field.ErrorTypeNotSupported,
errfield: "whenUnsatisfiable",
},
{
name: "multiple constraints ok with all required fields",
constraints: []core.TopologySpreadConstraint{
{MaxSkew: 1, TopologyKey: "k8s.io/zone", WhenUnsatisfiable: core.DoNotSchedule},
{MaxSkew: 2, TopologyKey: "k8s.io/node", WhenUnsatisfiable: core.ScheduleAnyway},
},
},
{
name: "multiple constraints missing TopologyKey on partial ones",
constraints: []core.TopologySpreadConstraint{
{MaxSkew: 1, TopologyKey: "k8s.io/zone", WhenUnsatisfiable: core.DoNotSchedule},
{MaxSkew: 2, WhenUnsatisfiable: core.ScheduleAnyway},
},
errtype: field.ErrorTypeRequired,
errfield: "topologyKey",
},
{
name: "duplicate constraints",
constraints: []core.TopologySpreadConstraint{
{MaxSkew: 1, TopologyKey: "k8s.io/zone", WhenUnsatisfiable: core.DoNotSchedule},
{MaxSkew: 2, TopologyKey: "k8s.io/zone", WhenUnsatisfiable: core.DoNotSchedule},
},
errtype: field.ErrorTypeDuplicate,
errfield: "{topologyKey, whenUnsatisfiable}",
},
}
for i, tc := range testCases {
errs := validateTopologySpreadConstraints(tc.constraints, field.NewPath("field"))
if len(errs) > 0 && tc.errtype == "" {
t.Errorf("[%d: %q] unexpected error(s): %v", i, tc.name, errs)
} else if len(errs) == 0 && tc.errtype != "" {
t.Errorf("[%d: %q] expected error type %v", i, tc.name, tc.errtype)
} else if len(errs) >= 1 {
if errs[0].Type != tc.errtype {
t.Errorf("[%d: %q] expected error type %v, got %v", i, tc.name, tc.errtype, errs[0].Type)
} else if !strings.HasSuffix(errs[0].Field, "."+tc.errfield) {
t.Errorf("[%d: %q] expected error on field %q, got %q", i, tc.name, tc.errfield, errs[0].Field)
}
}
}
}

View File

@ -462,6 +462,12 @@ const (
//
// Enables ipv6 dual stack
IPv6DualStack featuregate.Feature = "IPv6DualStack"
// owner: @Huang-Wei
// alpha: v1.16
//
// Schedule pods evenly across available topology domains.
EvenPodsSpread featuregate.Feature = "EvenPodsSpread"
)
func init() {
@ -539,6 +545,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
VolumePVCDataSource: {Default: false, PreRelease: featuregate.Alpha},
PodOverhead: {Default: false, PreRelease: featuregate.Alpha},
IPv6DualStack: {Default: false, PreRelease: featuregate.Alpha},
EvenPodsSpread: {Default: false, PreRelease: featuregate.Alpha},
// inherited features from generic apiserver, relisted here to get a conflict if it is changed
// unintentionally on either side:

View File

@ -3011,6 +3011,79 @@ type PodSpec struct {
// This field is alpha-level as of Kubernetes v1.16, and is only honored by servers that enable the PodOverhead feature.
// +optional
Overhead ResourceList `json:"overhead,omitempty" protobuf:"bytes,32,opt,name=overhead"`
// TopologySpreadConstraints describes how a group of pods ought to spread across topology
// domains. Scheduler will schedule pods in a way which abides by the constraints.
// This field is alpha-level and is only honored by clusters that enables the EvenPodsSpread
// feature.
// All topologySpreadConstraints are ANDed.
// +optional
// +patchMergeKey=topologyKey
// +patchStrategy=merge
// +listType=map
// +listMapKey=topologyKey
// +listMapKey=whenUnsatisfiable
TopologySpreadConstraints []TopologySpreadConstraint `json:"topologySpreadConstraints,omitempty" patchStrategy:"merge" patchMergeKey:"topologyKey" protobuf:"bytes,33,opt,name=topologySpreadConstraints"`
}
type UnsatisfiableConstraintAction string
const (
// DoNotSchedule instructs the scheduler not to schedule the pod
// when constraints are not satisfied.
DoNotSchedule UnsatisfiableConstraintAction = "DoNotSchedule"
// ScheduleAnyway instructs the scheduler to schedule the pod
// even if constraints are not satisfied.
ScheduleAnyway UnsatisfiableConstraintAction = "ScheduleAnyway"
)
// TopologySpreadConstraint specifies how to spread matching pods among the given topology.
type TopologySpreadConstraint struct {
// MaxSkew describes the degree to which pods may be unevenly distributed.
// It's the maximum permitted difference between the number of matching pods in
// any two topology domains of a given topology type.
// For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same
// labelSelector spread as 1/1/0:
// +-------+-------+-------+
// | zone1 | zone2 | zone3 |
// +-------+-------+-------+
// | P | P | |
// +-------+-------+-------+
// - if MaxSkew is 1, incoming pod can only be scheduled to zone3 to become 1/1/1;
// scheduling it onto zone1(zone2) would make the ActualSkew(2-0) on zone1(zone2)
// violate MaxSkew(1).
// - if MaxSkew is 2, incoming pod can be scheduled onto any zone.
// It's a required field. Default value is 1 and 0 is not allowed.
MaxSkew int32 `json:"maxSkew" protobuf:"varint,1,opt,name=maxSkew"`
// TopologyKey is the key of node labels. Nodes that have a label with this key
// and identical values are considered to be in the same topology.
// We consider each <key, value> as a "bucket", and try to put balanced number
// of pods into each bucket.
// It's a required field.
TopologyKey string `json:"topologyKey" protobuf:"bytes,2,opt,name=topologyKey"`
// WhenUnsatisfiable indicates how to deal with a pod if it doesn't satisfy
// the spread constraint.
// - DoNotSchedule (default) tells the scheduler not to schedule it
// - ScheduleAnyway tells the scheduler to still schedule it
// It's considered as "Unsatisfiable" if and only if placing incoming pod on any
// topology violates "MaxSkew".
// For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same
// labelSelector spread as 3/1/1:
// +-------+-------+-------+
// | zone1 | zone2 | zone3 |
// +-------+-------+-------+
// | P P P | P | P |
// +-------+-------+-------+
// If WhenUnsatisfiable is set to DoNotSchedule, incoming pod can only be scheduled
// to zone2(zone3) to become 3/2/1(3/1/2) as ActualSkew(2-1) on zone2(zone3) satisfies
// MaxSkew(1). In other words, the cluster can still be imbalanced, but scheduler
// won't make it *more* imbalanced.
// It's a required field.
WhenUnsatisfiable UnsatisfiableConstraintAction `json:"whenUnsatisfiable" protobuf:"bytes,3,opt,name=whenUnsatisfiable,casttype=UnsatisfiableConstraintAction"`
// LabelSelector is used to find matching pods.
// Pods that match this label selector are counted to determine the number of pods
// in their corresponding topology domain.
// +optional
LabelSelector *metav1.LabelSelector `json:"labelSelector,omitempty" protobuf:"bytes,4,opt,name=labelSelector"`
}
const (