diff --git a/pkg/apis/node/types.go b/pkg/apis/node/types.go index 7afeb578b33..99878880c7e 100644 --- a/pkg/apis/node/types.go +++ b/pkg/apis/node/types.go @@ -54,6 +54,13 @@ type RuntimeClass struct { // that enable the PodOverhead feature. // +optional Overhead *Overhead + + // Scheduling holds the scheduling constraints to ensure that pods running + // with this RuntimeClass are scheduled to nodes that support it. + // If scheduling is nil, this RuntimeClass is assumed to be supported by all + // nodes. + // +optional + Scheduling *Scheduling } // Overhead structure represents the resource overhead associated with running a pod. @@ -63,6 +70,24 @@ type Overhead struct { PodFixed core.ResourceList } +// Scheduling specifies the scheduling constraints for nodes supporting a +// RuntimeClass. +type Scheduling struct { + // nodeSelector lists labels that must be present on nodes that support this + // RuntimeClass. Pods using this RuntimeClass can only be scheduled to a + // node matched by this selector. The RuntimeClass nodeSelector is merged + // with a pod's existing nodeSelector. Any conflicts will cause the pod to + // be rejected in admission. + // +optional + NodeSelector map[string]string + + // tolerations are appended (excluding duplicates) to pods running with this + // RuntimeClass during admission, effectively unioning the set of nodes + // tolerated by the pod and the RuntimeClass. + // +optional + Tolerations []core.Toleration +} + // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object // RuntimeClassList is a list of RuntimeClass objects. diff --git a/pkg/apis/node/v1alpha1/conversion.go b/pkg/apis/node/v1alpha1/conversion.go index aa2add91c38..013d2a837bd 100644 --- a/pkg/apis/node/v1alpha1/conversion.go +++ b/pkg/apis/node/v1alpha1/conversion.go @@ -30,26 +30,44 @@ func addConversionFuncs(s *runtime.Scheme) error { ) } +// Convert_v1alpha1_RuntimeClass_To_node_RuntimeClass must override the automatic +// conversion since we unnested the spec struct after v1alpha1 func Convert_v1alpha1_RuntimeClass_To_node_RuntimeClass(in *v1alpha1.RuntimeClass, out *node.RuntimeClass, s conversion.Scope) error { out.ObjectMeta = in.ObjectMeta out.Handler = in.Spec.RuntimeHandler + if in.Spec.Overhead != nil { out.Overhead = &node.Overhead{} if err := Convert_v1alpha1_Overhead_To_node_Overhead(in.Spec.Overhead, out.Overhead, s); err != nil { return err } } + if in.Spec.Scheduling != nil { + out.Scheduling = &node.Scheduling{} + if err := Convert_v1alpha1_Scheduling_To_node_Scheduling(in.Spec.Scheduling, out.Scheduling, s); err != nil { + return err + } + } return nil } +// Convert_node_RuntimeClass_To_v1alpha1_RuntimeClass must override the automatic +// conversion since we unnested the spec struct after v1alpha1 func Convert_node_RuntimeClass_To_v1alpha1_RuntimeClass(in *node.RuntimeClass, out *v1alpha1.RuntimeClass, s conversion.Scope) error { out.ObjectMeta = in.ObjectMeta out.Spec.RuntimeHandler = in.Handler + if in.Overhead != nil { out.Spec.Overhead = &v1alpha1.Overhead{} if err := Convert_node_Overhead_To_v1alpha1_Overhead(in.Overhead, out.Spec.Overhead, s); err != nil { return err } } + if in.Scheduling != nil { + out.Spec.Scheduling = &v1alpha1.Scheduling{} + if err := Convert_node_Scheduling_To_v1alpha1_Scheduling(in.Scheduling, out.Spec.Scheduling, s); err != nil { + return err + } + } return nil } diff --git a/pkg/apis/node/v1alpha1/conversion_test.go b/pkg/apis/node/v1alpha1/conversion_test.go index ffea32ed185..4174bd7723f 100644 --- a/pkg/apis/node/v1alpha1/conversion_test.go +++ b/pkg/apis/node/v1alpha1/conversion_test.go @@ -48,6 +48,14 @@ func TestRuntimeClassConversion(t *testing.T) { core.ResourceCPU: resource.MustParse(cpuOverhead), }, }, + Scheduling: &node.Scheduling{ + NodeSelector: map[string]string{"extra-soft": "true"}, + Tolerations: []core.Toleration{{ + Key: "stinky", + Operator: core.TolerationOpExists, + Effect: core.TaintEffectNoSchedule, + }}, + }, }, external: &v1alpha1.RuntimeClass{ ObjectMeta: metav1.ObjectMeta{Name: name}, @@ -58,6 +66,14 @@ func TestRuntimeClassConversion(t *testing.T) { corev1.ResourceCPU: resource.MustParse(cpuOverhead), }, }, + Scheduling: &v1alpha1.Scheduling{ + NodeSelector: map[string]string{"extra-soft": "true"}, + Tolerations: []corev1.Toleration{{ + Key: "stinky", + Operator: corev1.TolerationOpExists, + Effect: corev1.TaintEffectNoSchedule, + }}, + }, }, }, }, @@ -75,6 +91,20 @@ func TestRuntimeClassConversion(t *testing.T) { }, }, }, + "empty-scheduling": { + internal: &node.RuntimeClass{ + ObjectMeta: metav1.ObjectMeta{Name: name}, + Handler: handler, + Scheduling: &node.Scheduling{}, + }, + external: &v1alpha1.RuntimeClass{ + ObjectMeta: metav1.ObjectMeta{Name: name}, + Spec: v1alpha1.RuntimeClassSpec{ + RuntimeHandler: handler, + Scheduling: &v1alpha1.Scheduling{}, + }, + }, + }, "empty": { internal: &node.RuntimeClass{ ObjectMeta: metav1.ObjectMeta{Name: name}, diff --git a/pkg/apis/node/validation/validation.go b/pkg/apis/node/validation/validation.go index 64131ae9dbd..3f9ce994f4c 100644 --- a/pkg/apis/node/validation/validation.go +++ b/pkg/apis/node/validation/validation.go @@ -18,6 +18,7 @@ package validation import ( apivalidation "k8s.io/apimachinery/pkg/api/validation" + unversionedvalidation "k8s.io/apimachinery/pkg/apis/meta/v1/validation" "k8s.io/apimachinery/pkg/util/validation/field" "k8s.io/kubernetes/pkg/apis/core" corevalidation "k8s.io/kubernetes/pkg/apis/core/validation" @@ -35,6 +36,9 @@ func ValidateRuntimeClass(rc *node.RuntimeClass) field.ErrorList { if rc.Overhead != nil { allErrs = append(allErrs, validateOverhead(rc.Overhead, field.NewPath("overhead"))...) } + if rc.Scheduling != nil { + allErrs = append(allErrs, validateScheduling(rc.Scheduling, field.NewPath("scheduling"))...) + } return allErrs } @@ -52,3 +56,33 @@ func validateOverhead(overhead *node.Overhead, fldPath *field.Path) field.ErrorL // reuse the ResourceRequirements validation logic return corevalidation.ValidateResourceRequirements(&core.ResourceRequirements{Limits: overhead.PodFixed}, fldPath) } + +func validateScheduling(s *node.Scheduling, fldPath *field.Path) field.ErrorList { + var allErrs field.ErrorList + if s.NodeSelector != nil { + allErrs = append(allErrs, unversionedvalidation.ValidateLabels(s.NodeSelector, fldPath.Child("nodeSelector"))...) + } + allErrs = append(allErrs, validateTolerations(s.Tolerations, fldPath.Child("tolerations"))...) + return allErrs +} + +func validateTolerations(tolerations []core.Toleration, fldPath *field.Path) field.ErrorList { + allErrs := corevalidation.ValidateTolerations(tolerations, fldPath.Child("tolerations")) + // Ensure uniquenes of tolerations. + tolerationSet := map[core.Toleration]bool{} + for i, t := range tolerations { + // listKey includes the toleration fields identified as listKeys in the API. + listKey := core.Toleration{ + Key: t.Key, + Operator: t.Operator, + Value: t.Value, + Effect: t.Effect, + } + if tolerationSet[listKey] { + allErrs = append(allErrs, field.Duplicate(fldPath.Index(i), t)) + } else { + tolerationSet[listKey] = true + } + } + return allErrs +} diff --git a/pkg/apis/node/validation/validation_test.go b/pkg/apis/node/validation/validation_test.go index 188572d5041..9aef2f327b3 100644 --- a/pkg/apis/node/validation/validation_test.go +++ b/pkg/apis/node/validation/validation_test.go @@ -26,6 +26,7 @@ import ( "k8s.io/kubernetes/pkg/apis/core" "k8s.io/kubernetes/pkg/apis/node" "k8s.io/kubernetes/pkg/features" + utilpointer "k8s.io/utils/pointer" "github.com/stretchr/testify/assert" ) @@ -186,3 +187,86 @@ func TestValidateOverhead(t *testing.T) { } } } + +func TestValidateScheduling(t *testing.T) { + tests := []struct { + name string + scheduling *node.Scheduling + expectErrs int + }{{ + name: "valid scheduling", + scheduling: &node.Scheduling{ + NodeSelector: map[string]string{"valid": "yes"}, + Tolerations: []core.Toleration{{ + Key: "valid", + Operator: core.TolerationOpExists, + Effect: core.TaintEffectNoSchedule, + }}, + }, + }, { + name: "empty scheduling", + scheduling: &node.Scheduling{}, + }, { + name: "invalid nodeSelector", + scheduling: &node.Scheduling{ + NodeSelector: map[string]string{"not a valid key!!!": "nope"}, + }, + expectErrs: 1, + }, { + name: "invalid toleration", + scheduling: &node.Scheduling{ + Tolerations: []core.Toleration{{ + Key: "valid", + Operator: core.TolerationOpExists, + Effect: core.TaintEffectNoSchedule, + }, { + Key: "not a valid key!!!", + Operator: core.TolerationOpExists, + Effect: core.TaintEffectNoSchedule, + }}, + }, + expectErrs: 1, + }, { + name: "duplicate tolerations", + scheduling: &node.Scheduling{ + Tolerations: []core.Toleration{{ + Key: "valid", + Operator: core.TolerationOpExists, + Effect: core.TaintEffectNoExecute, + TolerationSeconds: utilpointer.Int64Ptr(5), + }, { + Key: "valid", + Operator: core.TolerationOpExists, + Effect: core.TaintEffectNoExecute, + TolerationSeconds: utilpointer.Int64Ptr(10), + }}, + }, + expectErrs: 1, + }, { + name: "invalid scheduling", + scheduling: &node.Scheduling{ + NodeSelector: map[string]string{"not a valid key!!!": "nope"}, + Tolerations: []core.Toleration{{ + Key: "valid", + Operator: core.TolerationOpExists, + Effect: core.TaintEffectNoSchedule, + }, { + Key: "not a valid toleration key!!!", + Operator: core.TolerationOpExists, + Effect: core.TaintEffectNoSchedule, + }}, + }, + expectErrs: 2, + }} + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + rc := &node.RuntimeClass{ + ObjectMeta: metav1.ObjectMeta{Name: "foo"}, + Handler: "bar", + Scheduling: test.scheduling, + } + assert.Len(t, ValidateRuntimeClass(rc), test.expectErrs) + }) + } +} diff --git a/staging/src/k8s.io/api/node/v1alpha1/types.go b/staging/src/k8s.io/api/node/v1alpha1/types.go index 6466a83678f..035aac4aa02 100644 --- a/staging/src/k8s.io/api/node/v1alpha1/types.go +++ b/staging/src/k8s.io/api/node/v1alpha1/types.go @@ -66,6 +66,13 @@ type RuntimeClassSpec struct { // This field is alpha-level as of Kubernetes v1.15, and is only honored by servers that enable the PodOverhead feature. // +optional Overhead *Overhead `json:"overhead,omitempty" protobuf:"bytes,2,opt,name=overhead"` + + // Scheduling holds the scheduling constraints to ensure that pods running + // with this RuntimeClass are scheduled to nodes that support it. + // If scheduling is nil, this RuntimeClass is assumed to be supported by all + // nodes. + // +optional + Scheduling *Scheduling `json:"scheduling,omitempty" protobuf:"bytes,3,opt,name=scheduling"` } // Overhead structure represents the resource overhead associated with running a pod. @@ -75,6 +82,25 @@ type Overhead struct { PodFixed corev1.ResourceList `json:"podFixed,omitempty" protobuf:"bytes,1,opt,name=podFixed,casttype=k8s.io/api/core/v1.ResourceList,castkey=k8s.io/api/core/v1.ResourceName,castvalue=k8s.io/apimachinery/pkg/api/resource.Quantity"` } +// Scheduling specifies the scheduling constraints for nodes supporting a +// RuntimeClass. +type Scheduling struct { + // nodeSelector lists labels that must be present on nodes that support this + // RuntimeClass. Pods using this RuntimeClass can only be scheduled to a + // node matched by this selector. The RuntimeClass nodeSelector is merged + // with a pod's existing nodeSelector. Any conflicts will cause the pod to + // be rejected in admission. + // +optional + NodeSelector map[string]string `json:"nodeSelector,omitempty" protobuf:"bytes,1,opt,name=nodeSelector"` + + // tolerations are appended (excluding duplicates) to pods running with this + // RuntimeClass during admission, effectively unioning the set of nodes + // tolerated by the pod and the RuntimeClass. + // +optional + // +listType=atomic + Tolerations []corev1.Toleration `json:"tolerations,omitempty" protobuf:"bytes,2,rep,name=tolerations"` +} + // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object // RuntimeClassList is a list of RuntimeClass objects. diff --git a/staging/src/k8s.io/api/node/v1beta1/types.go b/staging/src/k8s.io/api/node/v1beta1/types.go index f389322d7be..fbf9461b1c0 100644 --- a/staging/src/k8s.io/api/node/v1beta1/types.go +++ b/staging/src/k8s.io/api/node/v1beta1/types.go @@ -56,6 +56,13 @@ type RuntimeClass struct { // This field is alpha-level as of Kubernetes v1.15, and is only honored by servers that enable the PodOverhead feature. // +optional Overhead *Overhead `json:"overhead,omitempty" protobuf:"bytes,3,opt,name=overhead"` + + // Scheduling holds the scheduling constraints to ensure that pods running + // with this RuntimeClass are scheduled to nodes that support it. + // If scheduling is nil, this RuntimeClass is assumed to be supported by all + // nodes. + // +optional + Scheduling *Scheduling `json:"scheduling,omitempty" protobuf:"bytes,4,opt,name=scheduling"` } // Overhead structure represents the resource overhead associated with running a pod. @@ -65,6 +72,25 @@ type Overhead struct { PodFixed corev1.ResourceList `json:"podFixed,omitempty" protobuf:"bytes,1,opt,name=podFixed,casttype=k8s.io/api/core/v1.ResourceList,castkey=k8s.io/api/core/v1.ResourceName,castvalue=k8s.io/apimachinery/pkg/api/resource.Quantity"` } +// Scheduling specifies the scheduling constraints for nodes supporting a +// RuntimeClass. +type Scheduling struct { + // nodeSelector lists labels that must be present on nodes that support this + // RuntimeClass. Pods using this RuntimeClass can only be scheduled to a + // node matched by this selector. The RuntimeClass nodeSelector is merged + // with a pod's existing nodeSelector. Any conflicts will cause the pod to + // be rejected in admission. + // +optional + NodeSelector map[string]string `json:"nodeSelector,omitempty" protobuf:"bytes,1,opt,name=nodeSelector"` + + // tolerations are appended (excluding duplicates) to pods running with this + // RuntimeClass during admission, effectively unioning the set of nodes + // tolerated by the pod and the RuntimeClass. + // +optional + // +listType=atomic + Tolerations []corev1.Toleration `json:"tolerations,omitempty" protobuf:"bytes,2,rep,name=tolerations"` +} + // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object // RuntimeClassList is a list of RuntimeClass objects.