diff --git a/hack/.golint_failures b/hack/.golint_failures index 95a0b7fb99b..45fb6e1aad7 100644 --- a/hack/.golint_failures +++ b/hack/.golint_failures @@ -324,7 +324,6 @@ pkg/registry/storage/rest pkg/registry/storage/storageclass pkg/registry/storage/storageclass/storage pkg/routes -pkg/scheduler/api pkg/security/apparmor pkg/security/podsecuritypolicy pkg/security/podsecuritypolicy/group diff --git a/pkg/apis/scheduling/helpers.go b/pkg/apis/scheduling/helpers.go new file mode 100644 index 00000000000..58b3799751c --- /dev/null +++ b/pkg/apis/scheduling/helpers.go @@ -0,0 +1,65 @@ +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package scheduling + +import ( + "fmt" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// SystemPriorityClasses define system priority classes that are auto-created at cluster bootstrapping. +// Our API validation logic ensures that any priority class that has a system prefix or its value +// is higher than HighestUserDefinablePriority is equal to one of these SystemPriorityClasses. +var systemPriorityClasses = []*PriorityClass{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: SystemNodeCritical, + }, + Value: SystemCriticalPriority + 1000, + Description: "Used for system critical pods that must not be moved from their current node.", + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: SystemClusterCritical, + }, + Value: SystemCriticalPriority, + Description: "Used for system critical pods that must run in the cluster, but can be moved to another node if necessary.", + }, +} + +// SystemPriorityClasses returns the list of system priority classes. +// NOTE: be careful not to modify any of elements of the returned array directly. +func SystemPriorityClasses() []*PriorityClass { + return systemPriorityClasses +} + +// IsKnownSystemPriorityClass checks that "pc" is equal to one of the system PriorityClasses. +// It ignores "description", labels, annotations, etc. of the PriorityClass. +func IsKnownSystemPriorityClass(pc *PriorityClass) (bool, error) { + for _, spc := range systemPriorityClasses { + if spc.Name == pc.Name { + if spc.Value != pc.Value { + return false, fmt.Errorf("value of %v PriorityClass must be %v", spc.Name, spc.Value) + } + if spc.GlobalDefault != pc.GlobalDefault { + return false, fmt.Errorf("globalDefault of %v PriorityClass must be %v", spc.Name, spc.GlobalDefault) + } + return true, nil + } + } + return false, fmt.Errorf("%v is not a known system priority class", pc.Name) +} diff --git a/pkg/apis/scheduling/helpers_test.go b/pkg/apis/scheduling/helpers_test.go new file mode 100644 index 00000000000..b0fe132b838 --- /dev/null +++ b/pkg/apis/scheduling/helpers_test.go @@ -0,0 +1,54 @@ +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package scheduling + +import ( + "testing" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func TestIsKnownSystemPriorityClass(t *testing.T) { + tests := []struct { + name string + pc *PriorityClass + expected bool + }{ + { + name: "system priority class", + pc: SystemPriorityClasses()[0], + expected: true, + }, + { + name: "non-system priority class", + pc: &PriorityClass{ + ObjectMeta: metav1.ObjectMeta{ + Name: SystemNodeCritical, + }, + Value: SystemCriticalPriority, // This is the value of system cluster critical + Description: "Used for system critical pods that must not be moved from their current node.", + }, + expected: false, + }, + } + + for _, test := range tests { + if is, err := IsKnownSystemPriorityClass(test.pc); test.expected != is { + t.Errorf("Test [%v]: Expected %v, but got %v. Error: %v", test.name, test.expected, is, err) + } + } +} diff --git a/pkg/apis/scheduling/validation/validation.go b/pkg/apis/scheduling/validation/validation.go index bd446b25585..5f2998157ed 100644 --- a/pkg/apis/scheduling/validation/validation.go +++ b/pkg/apis/scheduling/validation/validation.go @@ -17,22 +17,29 @@ limitations under the License. package validation import ( + "fmt" + "strings" + "k8s.io/apimachinery/pkg/util/validation/field" apivalidation "k8s.io/kubernetes/pkg/apis/core/validation" "k8s.io/kubernetes/pkg/apis/scheduling" ) -// ValidatePriorityClassName checks whether the given priority class name is valid. -func ValidatePriorityClassName(name string, prefix bool) []string { - return apivalidation.NameIsDNSSubdomain(name, prefix) -} - // ValidatePriorityClass tests whether required fields in the PriorityClass are // set correctly. func ValidatePriorityClass(pc *scheduling.PriorityClass) field.ErrorList { allErrs := field.ErrorList{} - allErrs = append(allErrs, apivalidation.ValidateObjectMeta(&pc.ObjectMeta, false, ValidatePriorityClassName, field.NewPath("metadata"))...) - // The "Value" field can be any valid integer. So, no need to validate. + allErrs = append(allErrs, apivalidation.ValidateObjectMeta(&pc.ObjectMeta, false, apivalidation.NameIsDNSSubdomain, field.NewPath("metadata"))...) + // If the priorityClass starts with a system prefix, it must be one of the + // predefined system priority classes. + if strings.HasPrefix(pc.Name, scheduling.SystemPriorityClassPrefix) { + if is, err := scheduling.IsKnownSystemPriorityClass(pc); !is { + allErrs = append(allErrs, field.Forbidden(field.NewPath("metadata", "name"), "priority class names with '"+scheduling.SystemPriorityClassPrefix+"' prefix are reserved for system use only. error: "+err.Error())) + } + } else if pc.Value > scheduling.HighestUserDefinablePriority { + // Non-system critical priority classes are not allowed to have a value larger than HighestUserDefinablePriority. + allErrs = append(allErrs, field.Forbidden(field.NewPath("value"), fmt.Sprintf("maximum allowed value of a user defined priority is %v", scheduling.HighestUserDefinablePriority))) + } return allErrs } diff --git a/pkg/apis/scheduling/validation/validation_test.go b/pkg/apis/scheduling/validation/validation_test.go index 8b0cec40b6c..3d7373e207e 100644 --- a/pkg/apis/scheduling/validation/validation_test.go +++ b/pkg/apis/scheduling/validation/validation_test.go @@ -25,6 +25,7 @@ import ( ) func TestValidatePriorityClass(t *testing.T) { + spcs := scheduling.SystemPriorityClasses() successCases := map[string]scheduling.PriorityClass{ "no description": { ObjectMeta: metav1.ObjectMeta{Name: "tier1", Namespace: ""}, @@ -36,6 +37,12 @@ func TestValidatePriorityClass(t *testing.T) { GlobalDefault: false, Description: "Used for the highest priority pods.", }, + "system node critical": { + ObjectMeta: metav1.ObjectMeta{Name: spcs[0].Name, Namespace: ""}, + Value: spcs[0].Value, + GlobalDefault: spcs[0].GlobalDefault, + Description: "system priority class 0", + }, } for k, v := range successCases { @@ -53,6 +60,16 @@ func TestValidatePriorityClass(t *testing.T) { ObjectMeta: metav1.ObjectMeta{Name: "tier&1", Namespace: ""}, Value: 100, }, + "incorrect system class name": { + ObjectMeta: metav1.ObjectMeta{Name: spcs[0].Name, Namespace: ""}, + Value: 0, + GlobalDefault: spcs[0].GlobalDefault, + }, + "incorrect system class value": { + ObjectMeta: metav1.ObjectMeta{Name: "system-something", Namespace: ""}, + Value: spcs[0].Value, + GlobalDefault: spcs[0].GlobalDefault, + }, } for k, v := range errorCases { diff --git a/pkg/registry/scheduling/priorityclass/storage/storage.go b/pkg/registry/scheduling/priorityclass/storage/storage.go index b4203621199..da78b01e1eb 100644 --- a/pkg/registry/scheduling/priorityclass/storage/storage.go +++ b/pkg/registry/scheduling/priorityclass/storage/storage.go @@ -17,11 +17,16 @@ limitations under the License. package storage import ( + "errors" + + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + genericapirequest "k8s.io/apiserver/pkg/endpoints/request" "k8s.io/apiserver/pkg/registry/generic" genericregistry "k8s.io/apiserver/pkg/registry/generic/registry" "k8s.io/apiserver/pkg/registry/rest" - schedulingapi "k8s.io/kubernetes/pkg/apis/scheduling" + "k8s.io/kubernetes/pkg/apis/scheduling" "k8s.io/kubernetes/pkg/registry/scheduling/priorityclass" ) @@ -33,9 +38,9 @@ type REST struct { // NewREST returns a RESTStorage object that will work against priority classes. func NewREST(optsGetter generic.RESTOptionsGetter) *REST { store := &genericregistry.Store{ - NewFunc: func() runtime.Object { return &schedulingapi.PriorityClass{} }, - NewListFunc: func() runtime.Object { return &schedulingapi.PriorityClassList{} }, - DefaultQualifiedResource: schedulingapi.Resource("priorityclasses"), + NewFunc: func() runtime.Object { return &scheduling.PriorityClass{} }, + NewListFunc: func() runtime.Object { return &scheduling.PriorityClassList{} }, + DefaultQualifiedResource: scheduling.Resource("priorityclasses"), CreateStrategy: priorityclass.Strategy, UpdateStrategy: priorityclass.Strategy, @@ -56,3 +61,14 @@ var _ rest.ShortNamesProvider = &REST{} func (r *REST) ShortNames() []string { return []string{"pc"} } + +// Delete ensures that system priority classes are not deleted. +func (r *REST) Delete(ctx genericapirequest.Context, name string, options *metav1.DeleteOptions) (runtime.Object, bool, error) { + for _, spc := range scheduling.SystemPriorityClasses() { + if name == spc.Name { + return nil, false, apierrors.NewForbidden(scheduling.Resource("priorityclasses"), spc.Name, errors.New("this is a system priority class and cannot be deleted")) + } + } + + return r.Store.Delete(ctx, name, options) +} diff --git a/pkg/registry/scheduling/priorityclass/storage/storage_test.go b/pkg/registry/scheduling/priorityclass/storage/storage_test.go index 8ef95fd59f5..01a081ea5d9 100644 --- a/pkg/registry/scheduling/priorityclass/storage/storage_test.go +++ b/pkg/registry/scheduling/priorityclass/storage/storage_test.go @@ -23,6 +23,7 @@ import ( "k8s.io/apimachinery/pkg/fields" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" + genericapirequest "k8s.io/apiserver/pkg/endpoints/request" "k8s.io/apiserver/pkg/registry/generic" genericregistrytest "k8s.io/apiserver/pkg/registry/generic/testing" etcdtesting "k8s.io/apiserver/pkg/storage/etcd/testing" @@ -105,6 +106,22 @@ func TestDelete(t *testing.T) { test.TestDelete(validNewPriorityClass()) } +// TestDeleteSystemPriorityClass checks that system priority classes cannot be deleted. +func TestDeleteSystemPriorityClass(t *testing.T) { + storage, server := newStorage(t) + defer server.Terminate(t) + defer storage.Store.DestroyFunc() + key := "test/system-node-critical" + ctx := genericapirequest.NewContext() + pc := scheduling.SystemPriorityClasses()[0] + if err := storage.Store.Storage.Create(ctx, key, pc, nil, 0); err != nil { + t.Fatalf("unexpected error: %v", err) + } + if _, _, err := storage.Delete(ctx, pc.Name, nil); err == nil { + t.Error("expected to receive an error") + } +} + func TestGet(t *testing.T) { storage, server := newStorage(t) defer server.Terminate(t) diff --git a/pkg/registry/scheduling/priorityclass/strategy.go b/pkg/registry/scheduling/priorityclass/strategy.go index f39dce1df1a..45fb3eacb0b 100644 --- a/pkg/registry/scheduling/priorityclass/strategy.go +++ b/pkg/registry/scheduling/priorityclass/strategy.go @@ -68,9 +68,7 @@ func (priorityClassStrategy) AllowCreateOnUpdate() bool { // ValidateUpdate is the default update validation for an end user. func (priorityClassStrategy) ValidateUpdate(ctx genericapirequest.Context, obj, old runtime.Object) field.ErrorList { - validationErrorList := validation.ValidatePriorityClass(obj.(*scheduling.PriorityClass)) - updateErrorList := validation.ValidatePriorityClassUpdate(obj.(*scheduling.PriorityClass), old.(*scheduling.PriorityClass)) - return append(validationErrorList, updateErrorList...) + return validation.ValidatePriorityClassUpdate(obj.(*scheduling.PriorityClass), old.(*scheduling.PriorityClass)) } // AllowUnconditionalUpdate is the default update policy for PriorityClass objects. diff --git a/pkg/registry/scheduling/rest/storage_scheduling.go b/pkg/registry/scheduling/rest/storage_scheduling.go index 807a4ae8a6c..f9f40249689 100644 --- a/pkg/registry/scheduling/rest/storage_scheduling.go +++ b/pkg/registry/scheduling/rest/storage_scheduling.go @@ -68,22 +68,6 @@ func (p RESTStorageProvider) PostStartHook() (string, genericapiserver.PostStart } func AddSystemPriorityClasses() genericapiserver.PostStartHookFunc { - priorityClasses := []*scheduling.PriorityClass{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: scheduling.SystemNodeCritical, - }, - Value: scheduling.SystemCriticalPriority + 1000, - Description: "Used for system critical pods that must not be moved from their current node.", - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: scheduling.SystemClusterCritical, - }, - Value: scheduling.SystemCriticalPriority, - Description: "Used for system critical pods that must run in the cluster, but can be moved to another node if necessary.", - }, - } return func(hookContext genericapiserver.PostStartHookContext) error { // Adding system priority classes is important. If they fail to add, many critical system // components may fail and cluster may break. @@ -94,7 +78,7 @@ func AddSystemPriorityClasses() genericapiserver.PostStartHookFunc { return false, nil } - for _, pc := range priorityClasses { + for _, pc := range scheduling.SystemPriorityClasses() { _, err := schedClientSet.PriorityClasses().Get(pc.Name, metav1.GetOptions{}) if err != nil { if apierrors.IsNotFound(err) { @@ -106,11 +90,12 @@ func AddSystemPriorityClasses() genericapiserver.PostStartHookFunc { } } else { // Unable to get the priority class for reasons other than "not found". + glog.Warningf("unable to get PriorityClass %v: %v. Retrying...", pc.Name, err) return false, err } } } - glog.Infof("all system priority classes are created successfully.") + glog.Infof("all system priority classes are created successfully or already exist.") return true, nil }) // if we're never able to make it through initialization, kill the API server. diff --git a/plugin/pkg/admission/priority/admission.go b/plugin/pkg/admission/priority/admission.go index 97add55a8d2..8f3d62469b1 100644 --- a/plugin/pkg/admission/priority/admission.go +++ b/plugin/pkg/admission/priority/admission.go @@ -19,12 +19,10 @@ package priority import ( "fmt" "io" - "strings" "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/labels" "k8s.io/apiserver/pkg/admission" - "k8s.io/apiserver/pkg/authentication/user" utilfeature "k8s.io/apiserver/pkg/util/feature" api "k8s.io/kubernetes/pkg/apis/core" "k8s.io/kubernetes/pkg/apis/scheduling" @@ -188,16 +186,6 @@ func (p *priorityPlugin) validatePriorityClass(a admission.Attributes) error { if !ok { return errors.NewBadRequest("resource was marked with kind PriorityClass but was unable to be converted") } - // API server adds system critical priority classes at bootstrapping. We should - // not enforce restrictions on adding system level priority classes for API server. - if userInfo := a.GetUserInfo(); userInfo == nil || userInfo.GetName() != user.APIServerUser { - if pc.Value > scheduling.HighestUserDefinablePriority { - return admission.NewForbidden(a, fmt.Errorf("maximum allowed value of a user defined priority is %v", scheduling.HighestUserDefinablePriority)) - } - if strings.HasPrefix(pc.Name, scheduling.SystemPriorityClassPrefix) { - return admission.NewForbidden(a, fmt.Errorf("priority class names with '"+scheduling.SystemPriorityClassPrefix+"' prefix are reserved for system use only")) - } - } // If the new PriorityClass tries to be the default priority, make sure that no other priority class is marked as default. if pc.GlobalDefault { dpc, err := p.getDefaultPriorityClass() diff --git a/plugin/pkg/admission/priority/admission_test.go b/plugin/pkg/admission/priority/admission_test.go index bda6a8a4eea..2a9e3a1b33c 100644 --- a/plugin/pkg/admission/priority/admission_test.go +++ b/plugin/pkg/admission/priority/admission_test.go @@ -87,17 +87,6 @@ var systemClusterCritical = &scheduling.PriorityClass{ } func TestPriorityClassAdmission(t *testing.T) { - var tooHighPriorityClass = &scheduling.PriorityClass{ - TypeMeta: metav1.TypeMeta{ - Kind: "PriorityClass", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: "toohighclass", - }, - Value: scheduling.HighestUserDefinablePriority + 1, - Description: "Just a test priority class", - } - var systemClass = &scheduling.PriorityClass{ TypeMeta: metav1.TypeMeta{ Kind: "PriorityClass", @@ -131,21 +120,7 @@ func TestPriorityClassAdmission(t *testing.T) { true, }, { - "too high PriorityClass value", - []*scheduling.PriorityClass{}, - tooHighPriorityClass, - nil, - true, - }, - { - "system name conflict", - []*scheduling.PriorityClass{}, - systemClass, - nil, - true, - }, - { - "system name allowed for API server", + "system name and value are allowed by admission controller", []*scheduling.PriorityClass{}, systemClass, &user.DefaultInfo{ diff --git a/test/e2e/scheduling/preemption.go b/test/e2e/scheduling/preemption.go index 64a29dc61f8..b67c6bf8130 100644 --- a/test/e2e/scheduling/preemption.go +++ b/test/e2e/scheduling/preemption.go @@ -325,11 +325,8 @@ var _ = SIGDescribe("PodPriorityResolution [Serial] [Feature:PodPreemption]", fu framework.ExpectNoError(err) }) - // This test verifies that when a higher priority pod is created and no node with - // enough resources is found, scheduler preempts a lower priority pod to schedule - // the high priority pod. + // This test verifies that system critical priorities are created automatically and resolved properly. It("validates critical system priorities are created and resolved", func() { - var podRes v1.ResourceList // Create pods that use system critical priorities and By("Create pods that use critical system priorities.") systemPriorityClasses := []string{ @@ -339,9 +336,6 @@ var _ = SIGDescribe("PodPriorityResolution [Serial] [Feature:PodPreemption]", fu pod := createPausePod(f, pausePodConfig{ Name: fmt.Sprintf("pod%d-%v", i, spc), PriorityClassName: spc, - Resources: &v1.ResourceRequirements{ - Requests: podRes, - }, }) Expect(pod.Spec.Priority).NotTo(BeNil()) framework.Logf("Created pod: %v", pod.Name)