Remove scheduler framework dependency on predicates package

This commit is contained in:
Abdullah Gharaibeh
2020-01-13 13:02:19 -05:00
parent e265afa2cd
commit fb66e807cd
32 changed files with 895 additions and 2335 deletions

View File

@@ -2,22 +2,17 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
go_library(
name = "go_default_library",
srcs = [
"error.go",
"predicates.go",
],
srcs = ["predicates.go"],
importpath = "k8s.io/kubernetes/pkg/scheduler/algorithm/predicates",
visibility = ["//visibility:public"],
deps = [
"//pkg/apis/core/v1/helper:go_default_library",
"//pkg/features:go_default_library",
"//pkg/scheduler/framework/plugins/helper:go_default_library",
"//pkg/scheduler/framework/plugins/nodeaffinity:go_default_library",
"//pkg/scheduler/framework/plugins/nodename:go_default_library",
"//pkg/scheduler/framework/plugins/nodeports:go_default_library",
"//pkg/scheduler/framework/plugins/noderesources:go_default_library",
"//pkg/scheduler/nodeinfo:go_default_library",
"//pkg/scheduler/util:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
],
)
@@ -26,16 +21,14 @@ go_test(
srcs = ["predicates_test.go"],
embed = [":go_default_library"],
deps = [
"//pkg/apis/core:go_default_library",
"//pkg/apis/core/v1/helper:go_default_library",
"//pkg/features:go_default_library",
"//pkg/scheduler/framework/plugins/nodename:go_default_library",
"//pkg/scheduler/framework/plugins/nodeports:go_default_library",
"//pkg/scheduler/framework/plugins/noderesources:go_default_library",
"//pkg/scheduler/nodeinfo:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//staging/src/k8s.io/component-base/featuregate/testing:go_default_library",
],
)

View File

@@ -1,121 +0,0 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package predicates
import (
"fmt"
v1 "k8s.io/api/core/v1"
)
var (
// The predicateName tries to be consistent as the predicate name used in DefaultAlgorithmProvider defined in
// defaults.go (which tend to be stable for backward compatibility)
// NOTE: If you add a new predicate failure error for a predicate that can never
// be made to pass by removing pods, or you change an existing predicate so that
// it can never be made to pass by removing pods, you need to add the predicate
// failure error in nodesWherePreemptionMightHelp() in scheduler/core/generic_scheduler.go
// ErrNodeSelectorNotMatch is used for MatchNodeSelector predicate error.
ErrNodeSelectorNotMatch = NewPredicateFailureError("MatchNodeSelector", "node(s) didn't match node selector")
// ErrPodNotMatchHostName is used for HostName predicate error.
ErrPodNotMatchHostName = NewPredicateFailureError("HostName", "node(s) didn't match the requested hostname")
// ErrPodNotFitsHostPorts is used for PodFitsHostPorts predicate error.
ErrPodNotFitsHostPorts = NewPredicateFailureError("PodFitsHostPorts", "node(s) didn't have free ports for the requested pod ports")
// ErrNodeUnknownCondition is used for NodeUnknownCondition predicate error.
ErrNodeUnknownCondition = NewPredicateFailureError("NodeUnknownCondition", "node(s) had unknown conditions")
)
var unresolvablePredicateFailureErrors = map[PredicateFailureReason]struct{}{
ErrNodeSelectorNotMatch: {},
ErrPodNotMatchHostName: {},
// Node conditions won't change when scheduler simulates removal of preemption victims.
// So, it is pointless to try nodes that have not been able to host the pod due to node
// conditions.
ErrNodeUnknownCondition: {},
}
// UnresolvablePredicateExists checks if there is at least one unresolvable predicate failure reason.
func UnresolvablePredicateExists(reasons []PredicateFailureReason) bool {
for _, r := range reasons {
if _, ok := unresolvablePredicateFailureErrors[r]; ok {
return true
}
}
return false
}
// InsufficientResourceError is an error type that indicates what kind of resource limit is
// hit and caused the unfitting failure.
type InsufficientResourceError struct {
// resourceName is the name of the resource that is insufficient
ResourceName v1.ResourceName
requested int64
used int64
capacity int64
}
// NewInsufficientResourceError returns an InsufficientResourceError.
func NewInsufficientResourceError(resourceName v1.ResourceName, requested, used, capacity int64) *InsufficientResourceError {
return &InsufficientResourceError{
ResourceName: resourceName,
requested: requested,
used: used,
capacity: capacity,
}
}
func (e *InsufficientResourceError) Error() string {
return fmt.Sprintf("Node didn't have enough resource: %s, requested: %d, used: %d, capacity: %d",
e.ResourceName, e.requested, e.used, e.capacity)
}
// GetReason returns the reason of the InsufficientResourceError.
func (e *InsufficientResourceError) GetReason() string {
return fmt.Sprintf("Insufficient %v", e.ResourceName)
}
// GetInsufficientAmount returns the amount of the insufficient resource of the error.
func (e *InsufficientResourceError) GetInsufficientAmount() int64 {
return e.requested - (e.capacity - e.used)
}
// PredicateFailureError describes a failure error of predicate.
type PredicateFailureError struct {
PredicateName string
PredicateDesc string
}
// NewPredicateFailureError creates a PredicateFailureError with message.
func NewPredicateFailureError(predicateName, predicateDesc string) *PredicateFailureError {
return &PredicateFailureError{PredicateName: predicateName, PredicateDesc: predicateDesc}
}
func (e *PredicateFailureError) Error() string {
return fmt.Sprintf("Predicate %s failed", e.PredicateName)
}
// GetReason returns the reason of the PredicateFailureError.
func (e *PredicateFailureError) GetReason() string {
return e.PredicateDesc
}
// PredicateFailureReason interface represents the failure reason of a predicate.
type PredicateFailureReason interface {
GetReason() string
}

View File

@@ -19,291 +19,78 @@ package predicates
import (
"fmt"
"k8s.io/klog"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/sets"
utilfeature "k8s.io/apiserver/pkg/util/feature"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
"k8s.io/kubernetes/pkg/features"
pluginhelper "k8s.io/kubernetes/pkg/scheduler/framework/plugins/helper"
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodeaffinity"
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodename"
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodeports"
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/noderesources"
schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
schedutil "k8s.io/kubernetes/pkg/scheduler/util"
)
const (
// MatchInterPodAffinityPred defines the name of predicate MatchInterPodAffinity.
MatchInterPodAffinityPred = "MatchInterPodAffinity"
// CheckVolumeBindingPred defines the name of predicate CheckVolumeBinding.
CheckVolumeBindingPred = "CheckVolumeBinding"
// GeneralPred defines the name of predicate GeneralPredicates.
GeneralPred = "GeneralPredicates"
// HostNamePred defines the name of predicate HostName.
HostNamePred = "HostName"
// PodFitsHostPortsPred defines the name of predicate PodFitsHostPorts.
PodFitsHostPortsPred = "PodFitsHostPorts"
// MatchNodeSelectorPred defines the name of predicate MatchNodeSelector.
MatchNodeSelectorPred = "MatchNodeSelector"
// PodFitsResourcesPred defines the name of predicate PodFitsResources.
PodFitsResourcesPred = "PodFitsResources"
// NoDiskConflictPred defines the name of predicate NoDiskConflict.
NoDiskConflictPred = "NoDiskConflict"
// PodToleratesNodeTaintsPred defines the name of predicate PodToleratesNodeTaints.
PodToleratesNodeTaintsPred = "PodToleratesNodeTaints"
// CheckNodeUnschedulablePred defines the name of predicate CheckNodeUnschedulablePredicate.
CheckNodeUnschedulablePred = "CheckNodeUnschedulable"
// CheckNodeLabelPresencePred defines the name of predicate CheckNodeLabelPresence.
CheckNodeLabelPresencePred = "CheckNodeLabelPresence"
// CheckServiceAffinityPred defines the name of predicate checkServiceAffinity.
CheckServiceAffinityPred = "CheckServiceAffinity"
// MaxEBSVolumeCountPred defines the name of predicate MaxEBSVolumeCount.
// DEPRECATED
// All cloudprovider specific predicates are deprecated in favour of MaxCSIVolumeCountPred.
MaxEBSVolumeCountPred = "MaxEBSVolumeCount"
// MaxGCEPDVolumeCountPred defines the name of predicate MaxGCEPDVolumeCount.
// DEPRECATED
// All cloudprovider specific predicates are deprecated in favour of MaxCSIVolumeCountPred.
MaxGCEPDVolumeCountPred = "MaxGCEPDVolumeCount"
// MaxAzureDiskVolumeCountPred defines the name of predicate MaxAzureDiskVolumeCount.
// DEPRECATED
// All cloudprovider specific predicates are deprecated in favour of MaxCSIVolumeCountPred.
MaxAzureDiskVolumeCountPred = "MaxAzureDiskVolumeCount"
// MaxCinderVolumeCountPred defines the name of predicate MaxCinderDiskVolumeCount.
// DEPRECATED
// All cloudprovider specific predicates are deprecated in favour of MaxCSIVolumeCountPred.
MaxCinderVolumeCountPred = "MaxCinderVolumeCount"
// MaxCSIVolumeCountPred defines the predicate that decides how many CSI volumes should be attached.
MaxCSIVolumeCountPred = "MaxCSIVolumeCountPred"
// NoVolumeZoneConflictPred defines the name of predicate NoVolumeZoneConflict.
NoVolumeZoneConflictPred = "NoVolumeZoneConflict"
// EvenPodsSpreadPred defines the name of predicate EvenPodsSpread.
EvenPodsSpreadPred = "EvenPodsSpread"
)
// DEPRECATED: all the logic in this package exist only because kubelet uses it.
// IMPORTANT NOTE for predicate developers:
// We are using cached predicate result for pods belonging to the same equivalence class.
// So when updating an existing predicate, you should consider whether your change will introduce new
// dependency to attributes of any API object like Pod, Node, Service etc.
// If yes, you are expected to invalidate the cached predicate result for related API object change.
// For example:
// https://github.com/kubernetes/kubernetes/blob/36a218e/plugin/pkg/scheduler/factory/factory.go#L422
// IMPORTANT NOTE: this list contains the ordering of the predicates, if you develop a new predicate
// it is mandatory to add its name to this list.
// Otherwise it won't be processed, see generic_scheduler#podFitsOnNode().
// The order is based on the restrictiveness & complexity of predicates.
// Design doc: https://github.com/kubernetes/community/blob/master/contributors/design-proposals/scheduling/predicates-ordering.md
var (
predicatesOrdering = []string{CheckNodeUnschedulablePred,
GeneralPred, HostNamePred, PodFitsHostPortsPred,
MatchNodeSelectorPred, PodFitsResourcesPred, NoDiskConflictPred,
PodToleratesNodeTaintsPred, CheckNodeLabelPresencePred,
CheckServiceAffinityPred, MaxEBSVolumeCountPred, MaxGCEPDVolumeCountPred, MaxCSIVolumeCountPred,
MaxAzureDiskVolumeCountPred, MaxCinderVolumeCountPred, CheckVolumeBindingPred, NoVolumeZoneConflictPred,
EvenPodsSpreadPred, MatchInterPodAffinityPred}
)
// Ordering returns the ordering of predicates.
func Ordering() []string {
return predicatesOrdering
// InsufficientResourceError is an error type that indicates what kind of resource limit is
// hit and caused the unfitting failure.
type InsufficientResourceError struct {
noderesources.InsufficientResource
}
// Metadata interface represents anything that can access a predicate metadata.
// DEPRECATED.
type Metadata interface{}
// FitPredicate is a function that indicates if a pod fits into an existing node.
// The failure information is given by the error.
type FitPredicate func(pod *v1.Pod, meta Metadata, nodeInfo *schedulernodeinfo.NodeInfo) (bool, []PredicateFailureReason, error)
// GetResourceRequest returns a *schedulernodeinfo.Resource that covers the largest
// width in each resource dimension. Because init-containers run sequentially, we collect
// the max in each dimension iteratively. In contrast, we sum the resource vectors for
// regular containers since they run simultaneously.
//
// If Pod Overhead is specified and the feature gate is set, the resources defined for Overhead
// are added to the calculated Resource request sum
//
// Example:
//
// Pod:
// InitContainers
// IC1:
// CPU: 2
// Memory: 1G
// IC2:
// CPU: 2
// Memory: 3G
// Containers
// C1:
// CPU: 2
// Memory: 1G
// C2:
// CPU: 1
// Memory: 1G
//
// Result: CPU: 3, Memory: 3G
func GetResourceRequest(pod *v1.Pod) *schedulernodeinfo.Resource {
result := &schedulernodeinfo.Resource{}
for _, container := range pod.Spec.Containers {
result.Add(container.Resources.Requests)
}
// take max_resource(sum_pod, any_init_container)
for _, container := range pod.Spec.InitContainers {
result.SetMaxResource(container.Resources.Requests)
}
// If Overhead is being utilized, add to the total requests for the pod
if pod.Spec.Overhead != nil && utilfeature.DefaultFeatureGate.Enabled(features.PodOverhead) {
result.Add(pod.Spec.Overhead)
}
return result
func (e *InsufficientResourceError) Error() string {
return fmt.Sprintf("Node didn't have enough resource: %s, requested: %d, used: %d, capacity: %d",
e.ResourceName, e.Requested, e.Used, e.Capacity)
}
func podName(pod *v1.Pod) string {
return pod.Namespace + "/" + pod.Name
// PredicateFailureReason interface represents the failure reason of a predicate.
type PredicateFailureReason interface {
GetReason() string
}
// PodFitsResources is a wrapper around PodFitsResourcesPredicate that implements FitPredicate interface.
// TODO(#85822): remove this function once predicate registration logic is deleted.
func PodFitsResources(pod *v1.Pod, _ Metadata, nodeInfo *schedulernodeinfo.NodeInfo) (bool, []PredicateFailureReason, error) {
return PodFitsResourcesPredicate(pod, nil, nil, nodeInfo)
// GetReason returns the reason of the InsufficientResourceError.
func (e *InsufficientResourceError) GetReason() string {
return fmt.Sprintf("Insufficient %v", e.ResourceName)
}
// PodFitsResourcesPredicate checks if a node has sufficient resources, such as cpu, memory, gpu, opaque int resources etc to run a pod.
// First return value indicates whether a node has sufficient resources to run a pod while the second return value indicates the
// predicate failure reasons if the node has insufficient resources to run the pod
func PodFitsResourcesPredicate(pod *v1.Pod, podRequest *schedulernodeinfo.Resource, ignoredExtendedResources sets.String, nodeInfo *schedulernodeinfo.NodeInfo) (bool, []PredicateFailureReason, error) {
node := nodeInfo.Node()
if node == nil {
return false, nil, fmt.Errorf("node not found")
}
var predicateFails []PredicateFailureReason
allowedPodNumber := nodeInfo.AllowedPodNumber()
if len(nodeInfo.Pods())+1 > allowedPodNumber {
predicateFails = append(predicateFails, NewInsufficientResourceError(v1.ResourcePods, 1, int64(len(nodeInfo.Pods())), int64(allowedPodNumber)))
}
if ignoredExtendedResources == nil {
ignoredExtendedResources = sets.NewString()
}
if podRequest == nil {
podRequest = GetResourceRequest(pod)
}
if podRequest.MilliCPU == 0 &&
podRequest.Memory == 0 &&
podRequest.EphemeralStorage == 0 &&
len(podRequest.ScalarResources) == 0 {
return len(predicateFails) == 0, predicateFails, nil
}
allocatable := nodeInfo.AllocatableResource()
if allocatable.MilliCPU < podRequest.MilliCPU+nodeInfo.RequestedResource().MilliCPU {
predicateFails = append(predicateFails, NewInsufficientResourceError(v1.ResourceCPU, podRequest.MilliCPU, nodeInfo.RequestedResource().MilliCPU, allocatable.MilliCPU))
}
if allocatable.Memory < podRequest.Memory+nodeInfo.RequestedResource().Memory {
predicateFails = append(predicateFails, NewInsufficientResourceError(v1.ResourceMemory, podRequest.Memory, nodeInfo.RequestedResource().Memory, allocatable.Memory))
}
if allocatable.EphemeralStorage < podRequest.EphemeralStorage+nodeInfo.RequestedResource().EphemeralStorage {
predicateFails = append(predicateFails, NewInsufficientResourceError(v1.ResourceEphemeralStorage, podRequest.EphemeralStorage, nodeInfo.RequestedResource().EphemeralStorage, allocatable.EphemeralStorage))
}
for rName, rQuant := range podRequest.ScalarResources {
if v1helper.IsExtendedResourceName(rName) {
// If this resource is one of the extended resources that should be
// ignored, we will skip checking it.
if ignoredExtendedResources.Has(string(rName)) {
continue
}
}
if allocatable.ScalarResources[rName] < rQuant+nodeInfo.RequestedResource().ScalarResources[rName] {
predicateFails = append(predicateFails, NewInsufficientResourceError(rName, podRequest.ScalarResources[rName], nodeInfo.RequestedResource().ScalarResources[rName], allocatable.ScalarResources[rName]))
}
}
if klog.V(10) && len(predicateFails) == 0 {
// We explicitly don't do klog.V(10).Infof() to avoid computing all the parameters if this is
// not logged. There is visible performance gain from it.
klog.Infof("Schedule Pod %+v on Node %+v is allowed, Node is running only %v out of %v Pods.",
podName(pod), node.Name, len(nodeInfo.Pods()), allowedPodNumber)
}
return len(predicateFails) == 0, predicateFails, nil
// GetInsufficientAmount returns the amount of the insufficient resource of the error.
func (e *InsufficientResourceError) GetInsufficientAmount() int64 {
return e.Requested - (e.Capacity - e.Used)
}
// PodMatchNodeSelector checks if a pod node selector matches the node label.
func PodMatchNodeSelector(pod *v1.Pod, meta Metadata, nodeInfo *schedulernodeinfo.NodeInfo) (bool, []PredicateFailureReason, error) {
node := nodeInfo.Node()
if node == nil {
return false, nil, fmt.Errorf("node not found")
}
if pluginhelper.PodMatchesNodeSelectorAndAffinityTerms(pod, node) {
return true, nil, nil
}
return false, []PredicateFailureReason{ErrNodeSelectorNotMatch}, nil
// PredicateFailureError describes a failure error of predicate.
type PredicateFailureError struct {
PredicateName string
PredicateDesc string
}
// PodFitsHost checks if a pod spec node name matches the current node.
func PodFitsHost(pod *v1.Pod, meta Metadata, nodeInfo *schedulernodeinfo.NodeInfo) (bool, []PredicateFailureReason, error) {
if len(pod.Spec.NodeName) == 0 {
return true, nil, nil
}
node := nodeInfo.Node()
if node == nil {
return false, nil, fmt.Errorf("node not found")
}
if pod.Spec.NodeName == node.Name {
return true, nil, nil
}
return false, []PredicateFailureReason{ErrPodNotMatchHostName}, nil
func (e *PredicateFailureError) Error() string {
return fmt.Sprintf("Predicate %s failed", e.PredicateName)
}
// PodFitsHostPorts is a wrapper around PodFitsHostPortsPredicate. This is needed until
// we are able to get rid of the FitPredicate function signature.
// TODO(#85822): remove this function once predicate registration logic is deleted.
func PodFitsHostPorts(pod *v1.Pod, _ Metadata, nodeInfo *schedulernodeinfo.NodeInfo) (bool, []PredicateFailureReason, error) {
return PodFitsHostPortsPredicate(pod, nil, nodeInfo)
}
// PodFitsHostPortsPredicate checks if a node has free ports for the requested pod ports.
func PodFitsHostPortsPredicate(pod *v1.Pod, meta []*v1.ContainerPort, nodeInfo *schedulernodeinfo.NodeInfo) (bool, []PredicateFailureReason, error) {
wantPorts := meta
if wantPorts == nil {
// Fallback to computing it.
wantPorts = schedutil.GetContainerPorts(pod)
}
if len(wantPorts) == 0 {
return true, nil, nil
}
existingPorts := nodeInfo.UsedPorts()
// try to see whether existingPorts and wantPorts will conflict or not
for _, cp := range wantPorts {
if existingPorts.CheckConflict(cp.HostIP, string(cp.Protocol), cp.HostPort) {
return false, []PredicateFailureReason{ErrPodNotFitsHostPorts}, nil
}
}
return true, nil, nil
// GetReason returns the reason of the PredicateFailureError.
func (e *PredicateFailureError) GetReason() string {
return e.PredicateDesc
}
// GeneralPredicates checks a group of predicates that the kubelet cares about.
// DEPRECATED: this exist only because kubelet uses it. We should change kubelet to execute the individual predicates it requires.
func GeneralPredicates(pod *v1.Pod, meta Metadata, nodeInfo *schedulernodeinfo.NodeInfo) (bool, []PredicateFailureReason, error) {
var predicateFails []PredicateFailureReason
for _, predicate := range []FitPredicate{PodFitsResources, PodFitsHost, PodFitsHostPorts, PodMatchNodeSelector} {
fit, reasons, err := predicate(pod, meta, nodeInfo)
if err != nil {
return false, predicateFails, err
}
if !fit {
predicateFails = append(predicateFails, reasons...)
}
func GeneralPredicates(pod *v1.Pod, _ interface{}, nodeInfo *schedulernodeinfo.NodeInfo) (bool, []PredicateFailureReason, error) {
if nodeInfo.Node() == nil {
return false, nil, fmt.Errorf("node not found")
}
return len(predicateFails) == 0, predicateFails, nil
var reasons []PredicateFailureReason
for _, r := range noderesources.Fits(pod, nodeInfo, nil) {
reasons = append(reasons, &InsufficientResourceError{InsufficientResource: r})
}
if !pluginhelper.PodMatchesNodeSelectorAndAffinityTerms(pod, nodeInfo.Node()) {
reasons = append(reasons, &PredicateFailureError{nodeaffinity.Name, nodeaffinity.ErrReason})
}
if !nodename.Fits(pod, nodeInfo) {
reasons = append(reasons, &PredicateFailureError{nodename.Name, nodename.ErrReason})
}
if !nodeports.Fits(pod, nodeInfo) {
reasons = append(reasons, &PredicateFailureError{nodeports.Name, nodeports.ErrReason})
}
return len(reasons) == 0, reasons, nil
}

File diff suppressed because it is too large Load Diff