Allow system critical priority classes in API validation

This commit is contained in:
Bobby (Babak) Salamat 2018-03-01 00:47:48 -08:00
parent 515ba9e8d4
commit 9592a9ecf4
12 changed files with 193 additions and 78 deletions

View File

@ -324,7 +324,6 @@ pkg/registry/storage/rest
pkg/registry/storage/storageclass pkg/registry/storage/storageclass
pkg/registry/storage/storageclass/storage pkg/registry/storage/storageclass/storage
pkg/routes pkg/routes
pkg/scheduler/api
pkg/security/apparmor pkg/security/apparmor
pkg/security/podsecuritypolicy pkg/security/podsecuritypolicy
pkg/security/podsecuritypolicy/group pkg/security/podsecuritypolicy/group

View File

@ -0,0 +1,65 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduling
import (
"fmt"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
// SystemPriorityClasses define system priority classes that are auto-created at cluster bootstrapping.
// Our API validation logic ensures that any priority class that has a system prefix or its value
// is higher than HighestUserDefinablePriority is equal to one of these SystemPriorityClasses.
var systemPriorityClasses = []*PriorityClass{
{
ObjectMeta: metav1.ObjectMeta{
Name: SystemNodeCritical,
},
Value: SystemCriticalPriority + 1000,
Description: "Used for system critical pods that must not be moved from their current node.",
},
{
ObjectMeta: metav1.ObjectMeta{
Name: SystemClusterCritical,
},
Value: SystemCriticalPriority,
Description: "Used for system critical pods that must run in the cluster, but can be moved to another node if necessary.",
},
}
// SystemPriorityClasses returns the list of system priority classes.
// NOTE: be careful not to modify any of elements of the returned array directly.
func SystemPriorityClasses() []*PriorityClass {
return systemPriorityClasses
}
// IsKnownSystemPriorityClass checks that "pc" is equal to one of the system PriorityClasses.
// It ignores "description", labels, annotations, etc. of the PriorityClass.
func IsKnownSystemPriorityClass(pc *PriorityClass) (bool, error) {
for _, spc := range systemPriorityClasses {
if spc.Name == pc.Name {
if spc.Value != pc.Value {
return false, fmt.Errorf("value of %v PriorityClass must be %v", spc.Name, spc.Value)
}
if spc.GlobalDefault != pc.GlobalDefault {
return false, fmt.Errorf("globalDefault of %v PriorityClass must be %v", spc.Name, spc.GlobalDefault)
}
return true, nil
}
}
return false, fmt.Errorf("%v is not a known system priority class", pc.Name)
}

View File

@ -0,0 +1,54 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduling
import (
"testing"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
func TestIsKnownSystemPriorityClass(t *testing.T) {
tests := []struct {
name string
pc *PriorityClass
expected bool
}{
{
name: "system priority class",
pc: SystemPriorityClasses()[0],
expected: true,
},
{
name: "non-system priority class",
pc: &PriorityClass{
ObjectMeta: metav1.ObjectMeta{
Name: SystemNodeCritical,
},
Value: SystemCriticalPriority, // This is the value of system cluster critical
Description: "Used for system critical pods that must not be moved from their current node.",
},
expected: false,
},
}
for _, test := range tests {
if is, err := IsKnownSystemPriorityClass(test.pc); test.expected != is {
t.Errorf("Test [%v]: Expected %v, but got %v. Error: %v", test.name, test.expected, is, err)
}
}
}

View File

@ -17,22 +17,29 @@ limitations under the License.
package validation package validation
import ( import (
"fmt"
"strings"
"k8s.io/apimachinery/pkg/util/validation/field" "k8s.io/apimachinery/pkg/util/validation/field"
apivalidation "k8s.io/kubernetes/pkg/apis/core/validation" apivalidation "k8s.io/kubernetes/pkg/apis/core/validation"
"k8s.io/kubernetes/pkg/apis/scheduling" "k8s.io/kubernetes/pkg/apis/scheduling"
) )
// ValidatePriorityClassName checks whether the given priority class name is valid.
func ValidatePriorityClassName(name string, prefix bool) []string {
return apivalidation.NameIsDNSSubdomain(name, prefix)
}
// ValidatePriorityClass tests whether required fields in the PriorityClass are // ValidatePriorityClass tests whether required fields in the PriorityClass are
// set correctly. // set correctly.
func ValidatePriorityClass(pc *scheduling.PriorityClass) field.ErrorList { func ValidatePriorityClass(pc *scheduling.PriorityClass) field.ErrorList {
allErrs := field.ErrorList{} allErrs := field.ErrorList{}
allErrs = append(allErrs, apivalidation.ValidateObjectMeta(&pc.ObjectMeta, false, ValidatePriorityClassName, field.NewPath("metadata"))...) allErrs = append(allErrs, apivalidation.ValidateObjectMeta(&pc.ObjectMeta, false, apivalidation.NameIsDNSSubdomain, field.NewPath("metadata"))...)
// The "Value" field can be any valid integer. So, no need to validate. // If the priorityClass starts with a system prefix, it must be one of the
// predefined system priority classes.
if strings.HasPrefix(pc.Name, scheduling.SystemPriorityClassPrefix) {
if is, err := scheduling.IsKnownSystemPriorityClass(pc); !is {
allErrs = append(allErrs, field.Forbidden(field.NewPath("metadata", "name"), "priority class names with '"+scheduling.SystemPriorityClassPrefix+"' prefix are reserved for system use only. error: "+err.Error()))
}
} else if pc.Value > scheduling.HighestUserDefinablePriority {
// Non-system critical priority classes are not allowed to have a value larger than HighestUserDefinablePriority.
allErrs = append(allErrs, field.Forbidden(field.NewPath("value"), fmt.Sprintf("maximum allowed value of a user defined priority is %v", scheduling.HighestUserDefinablePriority)))
}
return allErrs return allErrs
} }

View File

@ -25,6 +25,7 @@ import (
) )
func TestValidatePriorityClass(t *testing.T) { func TestValidatePriorityClass(t *testing.T) {
spcs := scheduling.SystemPriorityClasses()
successCases := map[string]scheduling.PriorityClass{ successCases := map[string]scheduling.PriorityClass{
"no description": { "no description": {
ObjectMeta: metav1.ObjectMeta{Name: "tier1", Namespace: ""}, ObjectMeta: metav1.ObjectMeta{Name: "tier1", Namespace: ""},
@ -36,6 +37,12 @@ func TestValidatePriorityClass(t *testing.T) {
GlobalDefault: false, GlobalDefault: false,
Description: "Used for the highest priority pods.", Description: "Used for the highest priority pods.",
}, },
"system node critical": {
ObjectMeta: metav1.ObjectMeta{Name: spcs[0].Name, Namespace: ""},
Value: spcs[0].Value,
GlobalDefault: spcs[0].GlobalDefault,
Description: "system priority class 0",
},
} }
for k, v := range successCases { for k, v := range successCases {
@ -53,6 +60,16 @@ func TestValidatePriorityClass(t *testing.T) {
ObjectMeta: metav1.ObjectMeta{Name: "tier&1", Namespace: ""}, ObjectMeta: metav1.ObjectMeta{Name: "tier&1", Namespace: ""},
Value: 100, Value: 100,
}, },
"incorrect system class name": {
ObjectMeta: metav1.ObjectMeta{Name: spcs[0].Name, Namespace: ""},
Value: 0,
GlobalDefault: spcs[0].GlobalDefault,
},
"incorrect system class value": {
ObjectMeta: metav1.ObjectMeta{Name: "system-something", Namespace: ""},
Value: spcs[0].Value,
GlobalDefault: spcs[0].GlobalDefault,
},
} }
for k, v := range errorCases { for k, v := range errorCases {

View File

@ -17,11 +17,16 @@ limitations under the License.
package storage package storage
import ( import (
"errors"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime"
genericapirequest "k8s.io/apiserver/pkg/endpoints/request"
"k8s.io/apiserver/pkg/registry/generic" "k8s.io/apiserver/pkg/registry/generic"
genericregistry "k8s.io/apiserver/pkg/registry/generic/registry" genericregistry "k8s.io/apiserver/pkg/registry/generic/registry"
"k8s.io/apiserver/pkg/registry/rest" "k8s.io/apiserver/pkg/registry/rest"
schedulingapi "k8s.io/kubernetes/pkg/apis/scheduling" "k8s.io/kubernetes/pkg/apis/scheduling"
"k8s.io/kubernetes/pkg/registry/scheduling/priorityclass" "k8s.io/kubernetes/pkg/registry/scheduling/priorityclass"
) )
@ -33,9 +38,9 @@ type REST struct {
// NewREST returns a RESTStorage object that will work against priority classes. // NewREST returns a RESTStorage object that will work against priority classes.
func NewREST(optsGetter generic.RESTOptionsGetter) *REST { func NewREST(optsGetter generic.RESTOptionsGetter) *REST {
store := &genericregistry.Store{ store := &genericregistry.Store{
NewFunc: func() runtime.Object { return &schedulingapi.PriorityClass{} }, NewFunc: func() runtime.Object { return &scheduling.PriorityClass{} },
NewListFunc: func() runtime.Object { return &schedulingapi.PriorityClassList{} }, NewListFunc: func() runtime.Object { return &scheduling.PriorityClassList{} },
DefaultQualifiedResource: schedulingapi.Resource("priorityclasses"), DefaultQualifiedResource: scheduling.Resource("priorityclasses"),
CreateStrategy: priorityclass.Strategy, CreateStrategy: priorityclass.Strategy,
UpdateStrategy: priorityclass.Strategy, UpdateStrategy: priorityclass.Strategy,
@ -56,3 +61,14 @@ var _ rest.ShortNamesProvider = &REST{}
func (r *REST) ShortNames() []string { func (r *REST) ShortNames() []string {
return []string{"pc"} return []string{"pc"}
} }
// Delete ensures that system priority classes are not deleted.
func (r *REST) Delete(ctx genericapirequest.Context, name string, options *metav1.DeleteOptions) (runtime.Object, bool, error) {
for _, spc := range scheduling.SystemPriorityClasses() {
if name == spc.Name {
return nil, false, apierrors.NewForbidden(scheduling.Resource("priorityclasses"), spc.Name, errors.New("this is a system priority class and cannot be deleted"))
}
}
return r.Store.Delete(ctx, name, options)
}

View File

@ -23,6 +23,7 @@ import (
"k8s.io/apimachinery/pkg/fields" "k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime"
genericapirequest "k8s.io/apiserver/pkg/endpoints/request"
"k8s.io/apiserver/pkg/registry/generic" "k8s.io/apiserver/pkg/registry/generic"
genericregistrytest "k8s.io/apiserver/pkg/registry/generic/testing" genericregistrytest "k8s.io/apiserver/pkg/registry/generic/testing"
etcdtesting "k8s.io/apiserver/pkg/storage/etcd/testing" etcdtesting "k8s.io/apiserver/pkg/storage/etcd/testing"
@ -105,6 +106,22 @@ func TestDelete(t *testing.T) {
test.TestDelete(validNewPriorityClass()) test.TestDelete(validNewPriorityClass())
} }
// TestDeleteSystemPriorityClass checks that system priority classes cannot be deleted.
func TestDeleteSystemPriorityClass(t *testing.T) {
storage, server := newStorage(t)
defer server.Terminate(t)
defer storage.Store.DestroyFunc()
key := "test/system-node-critical"
ctx := genericapirequest.NewContext()
pc := scheduling.SystemPriorityClasses()[0]
if err := storage.Store.Storage.Create(ctx, key, pc, nil, 0); err != nil {
t.Fatalf("unexpected error: %v", err)
}
if _, _, err := storage.Delete(ctx, pc.Name, nil); err == nil {
t.Error("expected to receive an error")
}
}
func TestGet(t *testing.T) { func TestGet(t *testing.T) {
storage, server := newStorage(t) storage, server := newStorage(t)
defer server.Terminate(t) defer server.Terminate(t)

View File

@ -68,9 +68,7 @@ func (priorityClassStrategy) AllowCreateOnUpdate() bool {
// ValidateUpdate is the default update validation for an end user. // ValidateUpdate is the default update validation for an end user.
func (priorityClassStrategy) ValidateUpdate(ctx genericapirequest.Context, obj, old runtime.Object) field.ErrorList { func (priorityClassStrategy) ValidateUpdate(ctx genericapirequest.Context, obj, old runtime.Object) field.ErrorList {
validationErrorList := validation.ValidatePriorityClass(obj.(*scheduling.PriorityClass)) return validation.ValidatePriorityClassUpdate(obj.(*scheduling.PriorityClass), old.(*scheduling.PriorityClass))
updateErrorList := validation.ValidatePriorityClassUpdate(obj.(*scheduling.PriorityClass), old.(*scheduling.PriorityClass))
return append(validationErrorList, updateErrorList...)
} }
// AllowUnconditionalUpdate is the default update policy for PriorityClass objects. // AllowUnconditionalUpdate is the default update policy for PriorityClass objects.

View File

@ -68,22 +68,6 @@ func (p RESTStorageProvider) PostStartHook() (string, genericapiserver.PostStart
} }
func AddSystemPriorityClasses() genericapiserver.PostStartHookFunc { func AddSystemPriorityClasses() genericapiserver.PostStartHookFunc {
priorityClasses := []*scheduling.PriorityClass{
{
ObjectMeta: metav1.ObjectMeta{
Name: scheduling.SystemNodeCritical,
},
Value: scheduling.SystemCriticalPriority + 1000,
Description: "Used for system critical pods that must not be moved from their current node.",
},
{
ObjectMeta: metav1.ObjectMeta{
Name: scheduling.SystemClusterCritical,
},
Value: scheduling.SystemCriticalPriority,
Description: "Used for system critical pods that must run in the cluster, but can be moved to another node if necessary.",
},
}
return func(hookContext genericapiserver.PostStartHookContext) error { return func(hookContext genericapiserver.PostStartHookContext) error {
// Adding system priority classes is important. If they fail to add, many critical system // Adding system priority classes is important. If they fail to add, many critical system
// components may fail and cluster may break. // components may fail and cluster may break.
@ -94,7 +78,7 @@ func AddSystemPriorityClasses() genericapiserver.PostStartHookFunc {
return false, nil return false, nil
} }
for _, pc := range priorityClasses { for _, pc := range scheduling.SystemPriorityClasses() {
_, err := schedClientSet.PriorityClasses().Get(pc.Name, metav1.GetOptions{}) _, err := schedClientSet.PriorityClasses().Get(pc.Name, metav1.GetOptions{})
if err != nil { if err != nil {
if apierrors.IsNotFound(err) { if apierrors.IsNotFound(err) {
@ -106,11 +90,12 @@ func AddSystemPriorityClasses() genericapiserver.PostStartHookFunc {
} }
} else { } else {
// Unable to get the priority class for reasons other than "not found". // Unable to get the priority class for reasons other than "not found".
glog.Warningf("unable to get PriorityClass %v: %v. Retrying...", pc.Name, err)
return false, err return false, err
} }
} }
} }
glog.Infof("all system priority classes are created successfully.") glog.Infof("all system priority classes are created successfully or already exist.")
return true, nil return true, nil
}) })
// if we're never able to make it through initialization, kill the API server. // if we're never able to make it through initialization, kill the API server.

View File

@ -19,12 +19,10 @@ package priority
import ( import (
"fmt" "fmt"
"io" "io"
"strings"
"k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/labels"
"k8s.io/apiserver/pkg/admission" "k8s.io/apiserver/pkg/admission"
"k8s.io/apiserver/pkg/authentication/user"
utilfeature "k8s.io/apiserver/pkg/util/feature" utilfeature "k8s.io/apiserver/pkg/util/feature"
api "k8s.io/kubernetes/pkg/apis/core" api "k8s.io/kubernetes/pkg/apis/core"
"k8s.io/kubernetes/pkg/apis/scheduling" "k8s.io/kubernetes/pkg/apis/scheduling"
@ -188,16 +186,6 @@ func (p *priorityPlugin) validatePriorityClass(a admission.Attributes) error {
if !ok { if !ok {
return errors.NewBadRequest("resource was marked with kind PriorityClass but was unable to be converted") return errors.NewBadRequest("resource was marked with kind PriorityClass but was unable to be converted")
} }
// API server adds system critical priority classes at bootstrapping. We should
// not enforce restrictions on adding system level priority classes for API server.
if userInfo := a.GetUserInfo(); userInfo == nil || userInfo.GetName() != user.APIServerUser {
if pc.Value > scheduling.HighestUserDefinablePriority {
return admission.NewForbidden(a, fmt.Errorf("maximum allowed value of a user defined priority is %v", scheduling.HighestUserDefinablePriority))
}
if strings.HasPrefix(pc.Name, scheduling.SystemPriorityClassPrefix) {
return admission.NewForbidden(a, fmt.Errorf("priority class names with '"+scheduling.SystemPriorityClassPrefix+"' prefix are reserved for system use only"))
}
}
// If the new PriorityClass tries to be the default priority, make sure that no other priority class is marked as default. // If the new PriorityClass tries to be the default priority, make sure that no other priority class is marked as default.
if pc.GlobalDefault { if pc.GlobalDefault {
dpc, err := p.getDefaultPriorityClass() dpc, err := p.getDefaultPriorityClass()

View File

@ -87,17 +87,6 @@ var systemClusterCritical = &scheduling.PriorityClass{
} }
func TestPriorityClassAdmission(t *testing.T) { func TestPriorityClassAdmission(t *testing.T) {
var tooHighPriorityClass = &scheduling.PriorityClass{
TypeMeta: metav1.TypeMeta{
Kind: "PriorityClass",
},
ObjectMeta: metav1.ObjectMeta{
Name: "toohighclass",
},
Value: scheduling.HighestUserDefinablePriority + 1,
Description: "Just a test priority class",
}
var systemClass = &scheduling.PriorityClass{ var systemClass = &scheduling.PriorityClass{
TypeMeta: metav1.TypeMeta{ TypeMeta: metav1.TypeMeta{
Kind: "PriorityClass", Kind: "PriorityClass",
@ -131,21 +120,7 @@ func TestPriorityClassAdmission(t *testing.T) {
true, true,
}, },
{ {
"too high PriorityClass value", "system name and value are allowed by admission controller",
[]*scheduling.PriorityClass{},
tooHighPriorityClass,
nil,
true,
},
{
"system name conflict",
[]*scheduling.PriorityClass{},
systemClass,
nil,
true,
},
{
"system name allowed for API server",
[]*scheduling.PriorityClass{}, []*scheduling.PriorityClass{},
systemClass, systemClass,
&user.DefaultInfo{ &user.DefaultInfo{

View File

@ -325,11 +325,8 @@ var _ = SIGDescribe("PodPriorityResolution [Serial] [Feature:PodPreemption]", fu
framework.ExpectNoError(err) framework.ExpectNoError(err)
}) })
// This test verifies that when a higher priority pod is created and no node with // This test verifies that system critical priorities are created automatically and resolved properly.
// enough resources is found, scheduler preempts a lower priority pod to schedule
// the high priority pod.
It("validates critical system priorities are created and resolved", func() { It("validates critical system priorities are created and resolved", func() {
var podRes v1.ResourceList
// Create pods that use system critical priorities and // Create pods that use system critical priorities and
By("Create pods that use critical system priorities.") By("Create pods that use critical system priorities.")
systemPriorityClasses := []string{ systemPriorityClasses := []string{
@ -339,9 +336,6 @@ var _ = SIGDescribe("PodPriorityResolution [Serial] [Feature:PodPreemption]", fu
pod := createPausePod(f, pausePodConfig{ pod := createPausePod(f, pausePodConfig{
Name: fmt.Sprintf("pod%d-%v", i, spc), Name: fmt.Sprintf("pod%d-%v", i, spc),
PriorityClassName: spc, PriorityClassName: spc,
Resources: &v1.ResourceRequirements{
Requests: podRes,
},
}) })
Expect(pod.Spec.Priority).NotTo(BeNil()) Expect(pod.Spec.Priority).NotTo(BeNil())
framework.Logf("Created pod: %v", pod.Name) framework.Logf("Created pod: %v", pod.Name)