mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 03:41:45 +00:00
EvenPodsSpread: api changes
This commit is contained in:
parent
8d4c49faae
commit
49da505a9a
@ -401,6 +401,11 @@ func dropDisabledFields(
|
||||
// does not specify any values for these fields.
|
||||
podSpec.PreemptionPolicy = nil
|
||||
}
|
||||
|
||||
if !utilfeature.DefaultFeatureGate.Enabled(features.EvenPodsSpread) && !topologySpreadConstraintsInUse(oldPodSpec) {
|
||||
// Set TopologySpreadConstraints to nil only if feature is disabled and it is not used
|
||||
podSpec.TopologySpreadConstraints = nil
|
||||
}
|
||||
}
|
||||
|
||||
// dropDisabledRunAsGroupField removes disabled fields from PodSpec related
|
||||
@ -562,7 +567,14 @@ func overheadInUse(podSpec *api.PodSpec) bool {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// topologySpreadConstraintsInUse returns true if the pod spec is non-nil and has a TopologySpreadConstraints slice
|
||||
func topologySpreadConstraintsInUse(podSpec *api.PodSpec) bool {
|
||||
if podSpec == nil {
|
||||
return false
|
||||
}
|
||||
return len(podSpec.TopologySpreadConstraints) > 0
|
||||
}
|
||||
|
||||
// procMountInUse returns true if the pod spec is non-nil and has a SecurityContext's ProcMount field set to a non-default value
|
||||
|
@ -2715,6 +2715,13 @@ type PodSpec struct {
|
||||
// If not specified, the default is true.
|
||||
// +optional
|
||||
EnableServiceLinks *bool
|
||||
// TopologySpreadConstraints describes how a group of pods ought to spread across topology
|
||||
// domains. Scheduler will schedule pods in a way which abides by the constraints.
|
||||
// This field is alpha-level and is only honored by clusters that enables the EvenPodsSpread
|
||||
// feature.
|
||||
// All topologySpreadConstraints are ANDed.
|
||||
// +optional
|
||||
TopologySpreadConstraints []TopologySpreadConstraint
|
||||
}
|
||||
|
||||
// HostAlias holds the mapping between IP and hostnames that will be injected as an entry in the
|
||||
@ -4834,3 +4841,64 @@ const (
|
||||
// DefaultHardPodAffinityWeight defines the weight of the implicit PreferredDuringScheduling affinity rule.
|
||||
DefaultHardPodAffinitySymmetricWeight int32 = 1
|
||||
)
|
||||
|
||||
type UnsatisfiableConstraintAction string
|
||||
|
||||
const (
|
||||
// DoNotSchedule instructs the scheduler not to schedule the pod
|
||||
// when constraints are not satisfied.
|
||||
DoNotSchedule UnsatisfiableConstraintAction = "DoNotSchedule"
|
||||
// ScheduleAnyway instructs the scheduler to schedule the pod
|
||||
// even if constraints are not satisfied.
|
||||
ScheduleAnyway UnsatisfiableConstraintAction = "ScheduleAnyway"
|
||||
)
|
||||
|
||||
// TopologySpreadConstraint specifies how to spread matching pods among the given topology.
|
||||
type TopologySpreadConstraint struct {
|
||||
// MaxSkew describes the degree to which pods may be unevenly distributed.
|
||||
// It's the maximum permitted difference between the number of matching pods in
|
||||
// any two topology domains of a given topology type.
|
||||
// For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same
|
||||
// labelSelector spread as 1/1/0:
|
||||
// +-------+-------+-------+
|
||||
// | zone1 | zone2 | zone3 |
|
||||
// +-------+-------+-------+
|
||||
// | P | P | |
|
||||
// +-------+-------+-------+
|
||||
// - if MaxSkew is 1, incoming pod can only be scheduled to zone3 to become 1/1/1;
|
||||
// scheduling it onto zone1(zone2) would make the ActualSkew(2-0) on zone1(zone2)
|
||||
// violate MaxSkew(1).
|
||||
// - if MaxSkew is 2, incoming pod can be scheduled onto any zone.
|
||||
// It's a required field. Default value is 1 and 0 is not allowed.
|
||||
MaxSkew int32
|
||||
// TopologyKey is the key of node labels. Nodes that have a label with this key
|
||||
// and identical values are considered to be in the same topology.
|
||||
// We consider each <key, value> as a "bucket", and try to put balanced number
|
||||
// of pods into each bucket.
|
||||
// It's a required field.
|
||||
TopologyKey string
|
||||
// WhenUnsatisfiable indicates how to deal with a pod if it doesn't satisfy
|
||||
// the spread constraint.
|
||||
// - DoNotSchedule (default) tells the scheduler not to schedule it
|
||||
// - ScheduleAnyway tells the scheduler to still schedule it
|
||||
// It's considered as "Unsatisfiable" if and only if placing incoming pod on any
|
||||
// topology violates "MaxSkew".
|
||||
// For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same
|
||||
// labelSelector spread as 3/1/1:
|
||||
// +-------+-------+-------+
|
||||
// | zone1 | zone2 | zone3 |
|
||||
// +-------+-------+-------+
|
||||
// | P P P | P | P |
|
||||
// +-------+-------+-------+
|
||||
// If WhenUnsatisfiable is set to DoNotSchedule, incoming pod can only be scheduled
|
||||
// to zone2(zone3) to become 3/2/1(3/1/2) as ActualSkew(2-1) on zone2(zone3) satisfies
|
||||
// MaxSkew(1). In other words, the cluster can still be imbalanced, but scheduler
|
||||
// won't make it *more* imbalanced.
|
||||
// It's a required field.
|
||||
WhenUnsatisfiable UnsatisfiableConstraintAction
|
||||
// LabelSelector is used to find matching pods.
|
||||
// Pods that match this label selector are counted to determine the number of pods
|
||||
// in their corresponding topology domain.
|
||||
// +optional
|
||||
LabelSelector *metav1.LabelSelector
|
||||
}
|
||||
|
@ -3091,6 +3091,7 @@ func ValidatePodSpec(spec *core.PodSpec, fldPath *field.Path) field.ErrorList {
|
||||
allErrs = append(allErrs, validateAffinity(spec.Affinity, fldPath.Child("affinity"))...)
|
||||
allErrs = append(allErrs, validatePodDNSConfig(spec.DNSConfig, &spec.DNSPolicy, fldPath.Child("dnsConfig"))...)
|
||||
allErrs = append(allErrs, validateReadinessGates(spec.ReadinessGates, fldPath.Child("readinessGates"))...)
|
||||
allErrs = append(allErrs, validateTopologySpreadConstraints(spec.TopologySpreadConstraints, fldPath.Child("topologySpreadConstraints"))...)
|
||||
if len(spec.ServiceAccountName) > 0 {
|
||||
for _, msg := range ValidateServiceAccountName(spec.ServiceAccountName, false) {
|
||||
allErrs = append(allErrs, field.Invalid(fldPath.Child("serviceAccountName"), spec.ServiceAccountName, msg))
|
||||
@ -5561,3 +5562,78 @@ func ValidateProcMountType(fldPath *field.Path, procMountType core.ProcMountType
|
||||
return field.NotSupported(fldPath, procMountType, []string{string(core.DefaultProcMount), string(core.UnmaskedProcMount)})
|
||||
}
|
||||
}
|
||||
|
||||
var (
|
||||
supportedScheduleActions = sets.NewString(string(core.DoNotSchedule), string(core.ScheduleAnyway))
|
||||
)
|
||||
|
||||
type spreadConstraintPair struct {
|
||||
topologyKey string
|
||||
whenUnsatisfiable core.UnsatisfiableConstraintAction
|
||||
}
|
||||
|
||||
// validateTopologySpreadConstraints validates given TopologySpreadConstraints.
|
||||
func validateTopologySpreadConstraints(constraints []core.TopologySpreadConstraint, fldPath *field.Path) field.ErrorList {
|
||||
allErrs := field.ErrorList{}
|
||||
|
||||
var existingConstraintPairs []spreadConstraintPair
|
||||
for i, constraint := range constraints {
|
||||
subFldPath := fldPath.Index(i)
|
||||
if err := ValidateMaxSkew(subFldPath.Child("maxSkew"), constraint.MaxSkew); err != nil {
|
||||
allErrs = append(allErrs, err)
|
||||
}
|
||||
if err := ValidateTopologyKey(subFldPath.Child("topologyKey"), constraint.TopologyKey); err != nil {
|
||||
allErrs = append(allErrs, err)
|
||||
}
|
||||
if err := ValidateWhenUnsatisfiable(subFldPath.Child("whenUnsatisfiable"), constraint.WhenUnsatisfiable); err != nil {
|
||||
allErrs = append(allErrs, err)
|
||||
}
|
||||
// tuple {topologyKey, whenUnsatisfiable} denotes one kind of spread constraint
|
||||
pair := spreadConstraintPair{
|
||||
topologyKey: constraint.TopologyKey,
|
||||
whenUnsatisfiable: constraint.WhenUnsatisfiable,
|
||||
}
|
||||
if err := ValidateSpreadConstraintPair(subFldPath.Child("{topologyKey, whenUnsatisfiable}"), pair, existingConstraintPairs); err != nil {
|
||||
allErrs = append(allErrs, err)
|
||||
} else {
|
||||
existingConstraintPairs = append(existingConstraintPairs, pair)
|
||||
}
|
||||
}
|
||||
|
||||
return allErrs
|
||||
}
|
||||
|
||||
// ValidateMaxSkew tests that the argument is a valid MaxSkew.
|
||||
func ValidateMaxSkew(fldPath *field.Path, maxSkew int32) *field.Error {
|
||||
if maxSkew <= 0 {
|
||||
return field.Invalid(fldPath, maxSkew, isNotPositiveErrorMsg)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ValidateTopologyKey tests that the argument is a valid TopologyKey.
|
||||
func ValidateTopologyKey(fldPath *field.Path, topologyKey string) *field.Error {
|
||||
if len(topologyKey) == 0 {
|
||||
return field.Required(fldPath, "can not be empty")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ValidateWhenUnsatisfiable tests that the argument is a valid UnsatisfiableConstraintAction.
|
||||
func ValidateWhenUnsatisfiable(fldPath *field.Path, action core.UnsatisfiableConstraintAction) *field.Error {
|
||||
if !supportedScheduleActions.Has(string(action)) {
|
||||
return field.NotSupported(fldPath, action, supportedScheduleActions.List())
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ValidateSpreadConstraintPair tests that if `pair` exists in `existingConstraintPairs`.
|
||||
func ValidateSpreadConstraintPair(fldPath *field.Path, pair spreadConstraintPair, existingConstraintPairs []spreadConstraintPair) *field.Error {
|
||||
for _, existingPair := range existingConstraintPairs {
|
||||
if pair.topologyKey == existingPair.topologyKey &&
|
||||
pair.whenUnsatisfiable == existingPair.whenUnsatisfiable {
|
||||
return field.Duplicate(fldPath, pair)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
@ -13663,7 +13663,6 @@ func testDataSourceInSpec(name string, kind string, apiGroup string) *core.Persi
|
||||
}
|
||||
|
||||
func TestAlphaVolumePVCDataSource(t *testing.T) {
|
||||
|
||||
testCases := []struct {
|
||||
testName string
|
||||
claimSpec core.PersistentVolumeClaimSpec
|
||||
@ -13704,7 +13703,104 @@ func TestAlphaVolumePVCDataSource(t *testing.T) {
|
||||
if errs := ValidatePersistentVolumeClaimSpec(&tc.claimSpec, field.NewPath("spec")); len(errs) != 0 {
|
||||
t.Errorf("expected success: %v", errs)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateTopologySpreadConstraints(t *testing.T) {
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.EvenPodsSpread, true)()
|
||||
testCases := []struct {
|
||||
name string
|
||||
constraints []core.TopologySpreadConstraint
|
||||
errtype field.ErrorType
|
||||
errfield string
|
||||
}{
|
||||
{
|
||||
name: "all required fields ok",
|
||||
constraints: []core.TopologySpreadConstraint{
|
||||
{MaxSkew: 1, TopologyKey: "k8s.io/zone", WhenUnsatisfiable: core.DoNotSchedule},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "missing MaxSkew",
|
||||
constraints: []core.TopologySpreadConstraint{
|
||||
{TopologyKey: "k8s.io/zone", WhenUnsatisfiable: core.DoNotSchedule},
|
||||
},
|
||||
errtype: field.ErrorTypeInvalid,
|
||||
errfield: "maxSkew",
|
||||
},
|
||||
{
|
||||
name: "invalid MaxSkew",
|
||||
constraints: []core.TopologySpreadConstraint{
|
||||
{MaxSkew: 0, TopologyKey: "k8s.io/zone", WhenUnsatisfiable: core.DoNotSchedule},
|
||||
},
|
||||
errtype: field.ErrorTypeInvalid,
|
||||
errfield: "maxSkew",
|
||||
},
|
||||
{
|
||||
name: "missing TopologyKey",
|
||||
constraints: []core.TopologySpreadConstraint{
|
||||
{MaxSkew: 1, WhenUnsatisfiable: core.DoNotSchedule},
|
||||
},
|
||||
errtype: field.ErrorTypeRequired,
|
||||
errfield: "topologyKey",
|
||||
},
|
||||
{
|
||||
name: "missing scheduling mode",
|
||||
constraints: []core.TopologySpreadConstraint{
|
||||
{MaxSkew: 1, TopologyKey: "k8s.io/zone"},
|
||||
},
|
||||
errtype: field.ErrorTypeNotSupported,
|
||||
errfield: "whenUnsatisfiable",
|
||||
},
|
||||
{
|
||||
name: "unsupported scheduling mode",
|
||||
constraints: []core.TopologySpreadConstraint{
|
||||
{MaxSkew: 1, TopologyKey: "k8s.io/zone", WhenUnsatisfiable: core.UnsatisfiableConstraintAction("N/A")},
|
||||
},
|
||||
errtype: field.ErrorTypeNotSupported,
|
||||
errfield: "whenUnsatisfiable",
|
||||
},
|
||||
{
|
||||
name: "multiple constraints ok with all required fields",
|
||||
constraints: []core.TopologySpreadConstraint{
|
||||
{MaxSkew: 1, TopologyKey: "k8s.io/zone", WhenUnsatisfiable: core.DoNotSchedule},
|
||||
{MaxSkew: 2, TopologyKey: "k8s.io/node", WhenUnsatisfiable: core.ScheduleAnyway},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "multiple constraints missing TopologyKey on partial ones",
|
||||
constraints: []core.TopologySpreadConstraint{
|
||||
{MaxSkew: 1, TopologyKey: "k8s.io/zone", WhenUnsatisfiable: core.DoNotSchedule},
|
||||
{MaxSkew: 2, WhenUnsatisfiable: core.ScheduleAnyway},
|
||||
},
|
||||
errtype: field.ErrorTypeRequired,
|
||||
errfield: "topologyKey",
|
||||
},
|
||||
{
|
||||
name: "duplicate constraints",
|
||||
constraints: []core.TopologySpreadConstraint{
|
||||
{MaxSkew: 1, TopologyKey: "k8s.io/zone", WhenUnsatisfiable: core.DoNotSchedule},
|
||||
{MaxSkew: 2, TopologyKey: "k8s.io/zone", WhenUnsatisfiable: core.DoNotSchedule},
|
||||
},
|
||||
errtype: field.ErrorTypeDuplicate,
|
||||
errfield: "{topologyKey, whenUnsatisfiable}",
|
||||
},
|
||||
}
|
||||
|
||||
for i, tc := range testCases {
|
||||
errs := validateTopologySpreadConstraints(tc.constraints, field.NewPath("field"))
|
||||
|
||||
if len(errs) > 0 && tc.errtype == "" {
|
||||
t.Errorf("[%d: %q] unexpected error(s): %v", i, tc.name, errs)
|
||||
} else if len(errs) == 0 && tc.errtype != "" {
|
||||
t.Errorf("[%d: %q] expected error type %v", i, tc.name, tc.errtype)
|
||||
} else if len(errs) >= 1 {
|
||||
if errs[0].Type != tc.errtype {
|
||||
t.Errorf("[%d: %q] expected error type %v, got %v", i, tc.name, tc.errtype, errs[0].Type)
|
||||
} else if !strings.HasSuffix(errs[0].Field, "."+tc.errfield) {
|
||||
t.Errorf("[%d: %q] expected error on field %q, got %q", i, tc.name, tc.errfield, errs[0].Field)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -462,6 +462,12 @@ const (
|
||||
//
|
||||
// Enables ipv6 dual stack
|
||||
IPv6DualStack featuregate.Feature = "IPv6DualStack"
|
||||
|
||||
// owner: @Huang-Wei
|
||||
// alpha: v1.16
|
||||
//
|
||||
// Schedule pods evenly across available topology domains.
|
||||
EvenPodsSpread featuregate.Feature = "EvenPodsSpread"
|
||||
)
|
||||
|
||||
func init() {
|
||||
@ -539,6 +545,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
|
||||
VolumePVCDataSource: {Default: false, PreRelease: featuregate.Alpha},
|
||||
PodOverhead: {Default: false, PreRelease: featuregate.Alpha},
|
||||
IPv6DualStack: {Default: false, PreRelease: featuregate.Alpha},
|
||||
EvenPodsSpread: {Default: false, PreRelease: featuregate.Alpha},
|
||||
|
||||
// inherited features from generic apiserver, relisted here to get a conflict if it is changed
|
||||
// unintentionally on either side:
|
||||
|
@ -3011,6 +3011,79 @@ type PodSpec struct {
|
||||
// This field is alpha-level as of Kubernetes v1.16, and is only honored by servers that enable the PodOverhead feature.
|
||||
// +optional
|
||||
Overhead ResourceList `json:"overhead,omitempty" protobuf:"bytes,32,opt,name=overhead"`
|
||||
// TopologySpreadConstraints describes how a group of pods ought to spread across topology
|
||||
// domains. Scheduler will schedule pods in a way which abides by the constraints.
|
||||
// This field is alpha-level and is only honored by clusters that enables the EvenPodsSpread
|
||||
// feature.
|
||||
// All topologySpreadConstraints are ANDed.
|
||||
// +optional
|
||||
// +patchMergeKey=topologyKey
|
||||
// +patchStrategy=merge
|
||||
// +listType=map
|
||||
// +listMapKey=topologyKey
|
||||
// +listMapKey=whenUnsatisfiable
|
||||
TopologySpreadConstraints []TopologySpreadConstraint `json:"topologySpreadConstraints,omitempty" patchStrategy:"merge" patchMergeKey:"topologyKey" protobuf:"bytes,33,opt,name=topologySpreadConstraints"`
|
||||
}
|
||||
|
||||
type UnsatisfiableConstraintAction string
|
||||
|
||||
const (
|
||||
// DoNotSchedule instructs the scheduler not to schedule the pod
|
||||
// when constraints are not satisfied.
|
||||
DoNotSchedule UnsatisfiableConstraintAction = "DoNotSchedule"
|
||||
// ScheduleAnyway instructs the scheduler to schedule the pod
|
||||
// even if constraints are not satisfied.
|
||||
ScheduleAnyway UnsatisfiableConstraintAction = "ScheduleAnyway"
|
||||
)
|
||||
|
||||
// TopologySpreadConstraint specifies how to spread matching pods among the given topology.
|
||||
type TopologySpreadConstraint struct {
|
||||
// MaxSkew describes the degree to which pods may be unevenly distributed.
|
||||
// It's the maximum permitted difference between the number of matching pods in
|
||||
// any two topology domains of a given topology type.
|
||||
// For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same
|
||||
// labelSelector spread as 1/1/0:
|
||||
// +-------+-------+-------+
|
||||
// | zone1 | zone2 | zone3 |
|
||||
// +-------+-------+-------+
|
||||
// | P | P | |
|
||||
// +-------+-------+-------+
|
||||
// - if MaxSkew is 1, incoming pod can only be scheduled to zone3 to become 1/1/1;
|
||||
// scheduling it onto zone1(zone2) would make the ActualSkew(2-0) on zone1(zone2)
|
||||
// violate MaxSkew(1).
|
||||
// - if MaxSkew is 2, incoming pod can be scheduled onto any zone.
|
||||
// It's a required field. Default value is 1 and 0 is not allowed.
|
||||
MaxSkew int32 `json:"maxSkew" protobuf:"varint,1,opt,name=maxSkew"`
|
||||
// TopologyKey is the key of node labels. Nodes that have a label with this key
|
||||
// and identical values are considered to be in the same topology.
|
||||
// We consider each <key, value> as a "bucket", and try to put balanced number
|
||||
// of pods into each bucket.
|
||||
// It's a required field.
|
||||
TopologyKey string `json:"topologyKey" protobuf:"bytes,2,opt,name=topologyKey"`
|
||||
// WhenUnsatisfiable indicates how to deal with a pod if it doesn't satisfy
|
||||
// the spread constraint.
|
||||
// - DoNotSchedule (default) tells the scheduler not to schedule it
|
||||
// - ScheduleAnyway tells the scheduler to still schedule it
|
||||
// It's considered as "Unsatisfiable" if and only if placing incoming pod on any
|
||||
// topology violates "MaxSkew".
|
||||
// For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same
|
||||
// labelSelector spread as 3/1/1:
|
||||
// +-------+-------+-------+
|
||||
// | zone1 | zone2 | zone3 |
|
||||
// +-------+-------+-------+
|
||||
// | P P P | P | P |
|
||||
// +-------+-------+-------+
|
||||
// If WhenUnsatisfiable is set to DoNotSchedule, incoming pod can only be scheduled
|
||||
// to zone2(zone3) to become 3/2/1(3/1/2) as ActualSkew(2-1) on zone2(zone3) satisfies
|
||||
// MaxSkew(1). In other words, the cluster can still be imbalanced, but scheduler
|
||||
// won't make it *more* imbalanced.
|
||||
// It's a required field.
|
||||
WhenUnsatisfiable UnsatisfiableConstraintAction `json:"whenUnsatisfiable" protobuf:"bytes,3,opt,name=whenUnsatisfiable,casttype=UnsatisfiableConstraintAction"`
|
||||
// LabelSelector is used to find matching pods.
|
||||
// Pods that match this label selector are counted to determine the number of pods
|
||||
// in their corresponding topology domain.
|
||||
// +optional
|
||||
LabelSelector *metav1.LabelSelector `json:"labelSelector,omitempty" protobuf:"bytes,4,opt,name=labelSelector"`
|
||||
}
|
||||
|
||||
const (
|
||||
|
Loading…
Reference in New Issue
Block a user