mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-05 18:24:07 +00:00
Migrate priority functions with non-trivial reduce function.
This commit is contained in:
parent
e02b73ff67
commit
4ccb27202c
@ -17,6 +17,8 @@ limitations under the License.
|
|||||||
package priorities
|
package priorities
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
"github.com/golang/glog"
|
"github.com/golang/glog"
|
||||||
"k8s.io/kubernetes/pkg/api"
|
"k8s.io/kubernetes/pkg/api"
|
||||||
"k8s.io/kubernetes/pkg/labels"
|
"k8s.io/kubernetes/pkg/labels"
|
||||||
@ -29,55 +31,75 @@ import (
|
|||||||
// it will a get an add of preferredSchedulingTerm.Weight. Thus, the more preferredSchedulingTerms
|
// it will a get an add of preferredSchedulingTerm.Weight. Thus, the more preferredSchedulingTerms
|
||||||
// the node satisfies and the more the preferredSchedulingTerm that is satisfied weights, the higher
|
// the node satisfies and the more the preferredSchedulingTerm that is satisfied weights, the higher
|
||||||
// score the node gets.
|
// score the node gets.
|
||||||
func CalculateNodeAffinityPriority(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*api.Node) (schedulerapi.HostPriorityList, error) {
|
func CalculateNodeAffinityPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
|
||||||
var maxCount float64
|
node := nodeInfo.Node()
|
||||||
counts := make(map[string]float64, len(nodes))
|
if node == nil {
|
||||||
|
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
|
||||||
affinity, err := api.GetAffinityFromPodAnnotations(pod.Annotations)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var affinity *api.Affinity
|
||||||
|
if priorityMeta, ok := meta.(*priorityMetadata); ok {
|
||||||
|
affinity = priorityMeta.affinity
|
||||||
|
} else {
|
||||||
|
// We couldn't parse metadata - fallback to computing it.
|
||||||
|
var err error
|
||||||
|
affinity, err = api.GetAffinityFromPodAnnotations(pod.Annotations)
|
||||||
|
if err != nil {
|
||||||
|
return schedulerapi.HostPriority{}, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var count int32
|
||||||
// A nil element of PreferredDuringSchedulingIgnoredDuringExecution matches no objects.
|
// A nil element of PreferredDuringSchedulingIgnoredDuringExecution matches no objects.
|
||||||
// An element of PreferredDuringSchedulingIgnoredDuringExecution that refers to an
|
// An element of PreferredDuringSchedulingIgnoredDuringExecution that refers to an
|
||||||
// empty PreferredSchedulingTerm matches all objects.
|
// empty PreferredSchedulingTerm matches all objects.
|
||||||
if affinity != nil && affinity.NodeAffinity != nil && affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution != nil {
|
if affinity != nil && affinity.NodeAffinity != nil && affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution != nil {
|
||||||
// Match PreferredDuringSchedulingIgnoredDuringExecution term by term.
|
// Match PreferredDuringSchedulingIgnoredDuringExecution term by term.
|
||||||
for _, preferredSchedulingTerm := range affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution {
|
for i := range affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution {
|
||||||
|
preferredSchedulingTerm := &affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution[i]
|
||||||
if preferredSchedulingTerm.Weight == 0 {
|
if preferredSchedulingTerm.Weight == 0 {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: Avoid computing it for all nodes if this becomes a performance problem.
|
||||||
nodeSelector, err := api.NodeSelectorRequirementsAsSelector(preferredSchedulingTerm.Preference.MatchExpressions)
|
nodeSelector, err := api.NodeSelectorRequirementsAsSelector(preferredSchedulingTerm.Preference.MatchExpressions)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return schedulerapi.HostPriority{}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, node := range nodes {
|
|
||||||
if nodeSelector.Matches(labels.Set(node.Labels)) {
|
if nodeSelector.Matches(labels.Set(node.Labels)) {
|
||||||
counts[node.Name] += float64(preferredSchedulingTerm.Weight)
|
count += preferredSchedulingTerm.Weight
|
||||||
}
|
|
||||||
|
|
||||||
if counts[node.Name] > maxCount {
|
|
||||||
maxCount = counts[node.Name]
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
result := make(schedulerapi.HostPriorityList, 0, len(nodes))
|
return schedulerapi.HostPriority{
|
||||||
for _, node := range nodes {
|
Host: node.Name,
|
||||||
|
Score: int(count),
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func CalculateNodeAffinityPriorityReduce(pod *api.Pod, result schedulerapi.HostPriorityList) error {
|
||||||
|
var maxCount int
|
||||||
|
for i := range result {
|
||||||
|
if result[i].Score > maxCount {
|
||||||
|
maxCount = result[i].Score
|
||||||
|
}
|
||||||
|
}
|
||||||
|
maxCountFloat := float64(maxCount)
|
||||||
|
|
||||||
|
var fScore float64
|
||||||
|
for i := range result {
|
||||||
if maxCount > 0 {
|
if maxCount > 0 {
|
||||||
fScore := 10 * (counts[node.Name] / maxCount)
|
fScore = 10 * (float64(result[i].Score) / maxCountFloat)
|
||||||
result = append(result, schedulerapi.HostPriority{Host: node.Name, Score: int(fScore)})
|
} else {
|
||||||
|
fScore = 0
|
||||||
|
}
|
||||||
if glog.V(10) {
|
if glog.V(10) {
|
||||||
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
|
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
|
||||||
// not logged. There is visible performance gain from it.
|
// not logged. There is visible performance gain from it.
|
||||||
glog.Infof("%v -> %v: NodeAffinityPriority, Score: (%d)", pod.Name, node.Name, int(fScore))
|
glog.Infof("%v -> %v: NodeAffinityPriority, Score: (%d)", pod.Name, result[i].Host, int(fScore))
|
||||||
}
|
}
|
||||||
} else {
|
result[i].Score = int(fScore)
|
||||||
result = append(result, schedulerapi.HostPriority{Host: node.Name, Score: 0})
|
|
||||||
}
|
}
|
||||||
}
|
return nil
|
||||||
return result, nil
|
|
||||||
}
|
}
|
||||||
|
@ -155,7 +155,9 @@ func TestNodeAffinityPriority(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
list, err := CalculateNodeAffinityPriority(test.pod, schedulercache.CreateNodeNameToInfoMap(nil, test.nodes), test.nodes)
|
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
|
||||||
|
nap := priorityFunction(CalculateNodeAffinityPriorityMap, CalculateNodeAffinityPriorityReduce)
|
||||||
|
list, err := nap(test.pod, nodeNameToInfo, test.nodes)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("unexpected error: %v", err)
|
t.Errorf("unexpected error: %v", err)
|
||||||
}
|
}
|
||||||
|
@ -32,15 +32,27 @@ import (
|
|||||||
// priorityMetadata is a type that is passed as metadata for priority functions
|
// priorityMetadata is a type that is passed as metadata for priority functions
|
||||||
type priorityMetadata struct {
|
type priorityMetadata struct {
|
||||||
nonZeroRequest *schedulercache.Resource
|
nonZeroRequest *schedulercache.Resource
|
||||||
|
podTolerations []api.Toleration
|
||||||
|
affinity *api.Affinity
|
||||||
}
|
}
|
||||||
|
|
||||||
func PriorityMetadata(pod *api.Pod, nodes []*api.Node) interface{} {
|
func PriorityMetadata(pod *api.Pod) interface{} {
|
||||||
// If we cannot compute metadata, just return nil
|
// If we cannot compute metadata, just return nil
|
||||||
if pod == nil {
|
if pod == nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
tolerations, err := getTolerationListFromPod(pod)
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
affinity, err := api.GetAffinityFromPodAnnotations(pod.Annotations)
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
return &priorityMetadata{
|
return &priorityMetadata{
|
||||||
nonZeroRequest: getNonZeroRequests(pod),
|
nonZeroRequest: getNonZeroRequests(pod),
|
||||||
|
podTolerations: tolerations,
|
||||||
|
affinity: affinity,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -61,7 +61,7 @@ func priorityFunction(mapFn algorithm.PriorityMapFunction, reduceFn algorithm.Pr
|
|||||||
result = append(result, hostResult)
|
result = append(result, hostResult)
|
||||||
}
|
}
|
||||||
if reduceFn != nil {
|
if reduceFn != nil {
|
||||||
if err := reduceFn(result); err != nil {
|
if err := reduceFn(pod, result); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -17,6 +17,8 @@ limitations under the License.
|
|||||||
package priorities
|
package priorities
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
"github.com/golang/glog"
|
"github.com/golang/glog"
|
||||||
"k8s.io/kubernetes/pkg/api"
|
"k8s.io/kubernetes/pkg/api"
|
||||||
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
|
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
|
||||||
@ -24,7 +26,7 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
// CountIntolerableTaintsPreferNoSchedule gives the count of intolerable taints of a pod with effect PreferNoSchedule
|
// CountIntolerableTaintsPreferNoSchedule gives the count of intolerable taints of a pod with effect PreferNoSchedule
|
||||||
func countIntolerableTaintsPreferNoSchedule(taints []api.Taint, tolerations []api.Toleration) (intolerableTaints float64) {
|
func countIntolerableTaintsPreferNoSchedule(taints []api.Taint, tolerations []api.Toleration) (intolerableTaints int) {
|
||||||
for i := range taints {
|
for i := range taints {
|
||||||
taint := &taints[i]
|
taint := &taints[i]
|
||||||
// check only on taints that have effect PreferNoSchedule
|
// check only on taints that have effect PreferNoSchedule
|
||||||
@ -50,53 +52,65 @@ func getAllTolerationPreferNoSchedule(tolerations []api.Toleration) (tolerationL
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// ComputeTaintTolerationPriority prepares the priority list for all the nodes based on the number of intolerable taints on the node
|
func getTolerationListFromPod(pod *api.Pod) ([]api.Toleration, error) {
|
||||||
func ComputeTaintTolerationPriority(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*api.Node) (schedulerapi.HostPriorityList, error) {
|
|
||||||
// the max value of counts
|
|
||||||
var maxCount float64
|
|
||||||
// counts hold the count of intolerable taints of a pod for a given node
|
|
||||||
counts := make(map[string]float64, len(nodes))
|
|
||||||
|
|
||||||
tolerations, err := api.GetTolerationsFromPodAnnotations(pod.Annotations)
|
tolerations, err := api.GetTolerationsFromPodAnnotations(pod.Annotations)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
// Fetch a list of all toleration with effect PreferNoSchedule
|
return getAllTolerationPreferNoSchedule(tolerations), nil
|
||||||
tolerationList := getAllTolerationPreferNoSchedule(tolerations)
|
}
|
||||||
|
|
||||||
|
// ComputeTaintTolerationPriority prepares the priority list for all the nodes based on the number of intolerable taints on the node
|
||||||
|
func ComputeTaintTolerationPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
|
||||||
|
node := nodeInfo.Node()
|
||||||
|
if node == nil {
|
||||||
|
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
|
||||||
|
}
|
||||||
|
|
||||||
|
var tolerationList []api.Toleration
|
||||||
|
if priorityMeta, ok := meta.(*priorityMetadata); ok {
|
||||||
|
tolerationList = priorityMeta.podTolerations
|
||||||
|
} else {
|
||||||
|
var err error
|
||||||
|
tolerationList, err = getTolerationListFromPod(pod)
|
||||||
|
if err != nil {
|
||||||
|
return schedulerapi.HostPriority{}, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// calculate the intolerable taints for all the nodes
|
|
||||||
for _, node := range nodes {
|
|
||||||
taints, err := api.GetTaintsFromNodeAnnotations(node.Annotations)
|
taints, err := api.GetTaintsFromNodeAnnotations(node.Annotations)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return schedulerapi.HostPriority{}, err
|
||||||
|
}
|
||||||
|
return schedulerapi.HostPriority{
|
||||||
|
Host: node.Name,
|
||||||
|
Score: countIntolerableTaintsPreferNoSchedule(taints, tolerationList),
|
||||||
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
count := countIntolerableTaintsPreferNoSchedule(taints, tolerationList)
|
func ComputeTaintTolerationPriorityReduce(pod *api.Pod, result schedulerapi.HostPriorityList) error {
|
||||||
if count > 0 {
|
var maxCount int
|
||||||
// 0 is default value, so avoid unnecessary map operations.
|
for i := range result {
|
||||||
counts[node.Name] = count
|
if result[i].Score > maxCount {
|
||||||
if count > maxCount {
|
maxCount = result[i].Score
|
||||||
maxCount = count
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
maxCountFloat := float64(maxCount)
|
||||||
|
|
||||||
// The maximum priority value to give to a node
|
// The maximum priority value to give to a node
|
||||||
// Priority values range from 0 - maxPriority
|
// Priority values range from 0 - maxPriority
|
||||||
const maxPriority = float64(10)
|
const maxPriority = float64(10)
|
||||||
result := make(schedulerapi.HostPriorityList, 0, len(nodes))
|
for i := range result {
|
||||||
for _, node := range nodes {
|
|
||||||
fScore := maxPriority
|
fScore := maxPriority
|
||||||
if maxCount > 0 {
|
if maxCountFloat > 0 {
|
||||||
fScore = (1.0 - counts[node.Name]/maxCount) * 10
|
fScore = (1.0 - float64(result[i].Score)/maxCountFloat) * 10
|
||||||
}
|
}
|
||||||
if glog.V(10) {
|
if glog.V(10) {
|
||||||
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
|
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
|
||||||
// not logged. There is visible performance gain from it.
|
// not logged. There is visible performance gain from it.
|
||||||
glog.Infof("%v -> %v: Taint Toleration Priority, Score: (%d)", pod.Name, node.Name, int(fScore))
|
glog.Infof("%v -> %v: Taint Toleration Priority, Score: (%d)", pod.Name, result[i].Host, int(fScore))
|
||||||
}
|
}
|
||||||
|
result[i].Score = int(fScore)
|
||||||
result = append(result, schedulerapi.HostPriority{Host: node.Name, Score: int(fScore)})
|
|
||||||
}
|
}
|
||||||
return result, nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -210,8 +210,9 @@ func TestTaintAndToleration(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, nil)
|
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
|
||||||
list, err := ComputeTaintTolerationPriority(test.pod, nodeNameToInfo, test.nodes)
|
ttp := priorityFunction(ComputeTaintTolerationPriorityMap, ComputeTaintTolerationPriorityReduce)
|
||||||
|
list, err := ttp(test.pod, nodeNameToInfo, test.nodes)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("%s, unexpected error: %v", test.test, err)
|
t.Errorf("%s, unexpected error: %v", test.test, err)
|
||||||
}
|
}
|
||||||
|
@ -24,16 +24,19 @@ import (
|
|||||||
|
|
||||||
// FitPredicate is a function that indicates if a pod fits into an existing node.
|
// FitPredicate is a function that indicates if a pod fits into an existing node.
|
||||||
// The failure information is given by the error.
|
// The failure information is given by the error.
|
||||||
|
// TODO: Change interface{} to a specific type.
|
||||||
type FitPredicate func(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, []PredicateFailureReason, error)
|
type FitPredicate func(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, []PredicateFailureReason, error)
|
||||||
|
|
||||||
// PriorityMapFunction is a function that computes per-node results for a given node.
|
// PriorityMapFunction is a function that computes per-node results for a given node.
|
||||||
// TODO: Figure out the exact API of this method.
|
// TODO: Figure out the exact API of this method.
|
||||||
|
// TODO: Change interface{} to a specific type.
|
||||||
type PriorityMapFunction func(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error)
|
type PriorityMapFunction func(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error)
|
||||||
|
|
||||||
// PriorityReduceFunction is a function that aggregated per-node results and computes
|
// PriorityReduceFunction is a function that aggregated per-node results and computes
|
||||||
// final scores for all nodes.
|
// final scores for all nodes.
|
||||||
// TODO: Figure out the exact API of this method.
|
// TODO: Figure out the exact API of this method.
|
||||||
type PriorityReduceFunction func(result schedulerapi.HostPriorityList) error
|
// TODO: Change interface{} to a specific type.
|
||||||
|
type PriorityReduceFunction func(pod *api.Pod, result schedulerapi.HostPriorityList) error
|
||||||
|
|
||||||
// DEPRECATED
|
// DEPRECATED
|
||||||
// Use Map-Reduce pattern for priority functions.
|
// Use Map-Reduce pattern for priority functions.
|
||||||
|
@ -181,8 +181,8 @@ func defaultPriorities() sets.String {
|
|||||||
// Set this weight large enough to override all other priority functions.
|
// Set this weight large enough to override all other priority functions.
|
||||||
// TODO: Figure out a better way to do this, maybe at same time as fixing #24720.
|
// TODO: Figure out a better way to do this, maybe at same time as fixing #24720.
|
||||||
factory.RegisterPriorityFunction2("NodePreferAvoidPodsPriority", priorities.CalculateNodePreferAvoidPodsPriorityMap, nil, 10000),
|
factory.RegisterPriorityFunction2("NodePreferAvoidPodsPriority", priorities.CalculateNodePreferAvoidPodsPriorityMap, nil, 10000),
|
||||||
factory.RegisterPriorityFunction("NodeAffinityPriority", priorities.CalculateNodeAffinityPriority, 1),
|
factory.RegisterPriorityFunction2("NodeAffinityPriority", priorities.CalculateNodeAffinityPriorityMap, priorities.CalculateNodeAffinityPriorityReduce, 1),
|
||||||
factory.RegisterPriorityFunction("TaintTolerationPriority", priorities.ComputeTaintTolerationPriority, 1),
|
factory.RegisterPriorityFunction2("TaintTolerationPriority", priorities.ComputeTaintTolerationPriorityMap, priorities.ComputeTaintTolerationPriorityReduce, 1),
|
||||||
// pods should be placed in the same topological domain (e.g. same node, same rack, same zone, same power domain, etc.)
|
// pods should be placed in the same topological domain (e.g. same node, same rack, same zone, same power domain, etc.)
|
||||||
// as some other pods, or, conversely, should not be placed in the same topological domain as some other pods.
|
// as some other pods, or, conversely, should not be placed in the same topological domain as some other pods.
|
||||||
factory.RegisterPriorityConfigFactory(
|
factory.RegisterPriorityConfigFactory(
|
||||||
|
@ -255,7 +255,7 @@ func PrioritizeNodes(
|
|||||||
errs = append(errs, err)
|
errs = append(errs, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
meta := priorities.PriorityMetadata(pod, nodes)
|
meta := priorities.PriorityMetadata(pod)
|
||||||
results := make([]schedulerapi.HostPriorityList, 0, len(priorityConfigs))
|
results := make([]schedulerapi.HostPriorityList, 0, len(priorityConfigs))
|
||||||
for range priorityConfigs {
|
for range priorityConfigs {
|
||||||
results = append(results, nil)
|
results = append(results, nil)
|
||||||
@ -298,7 +298,7 @@ func PrioritizeNodes(
|
|||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
go func(index int, config algorithm.PriorityConfig) {
|
go func(index int, config algorithm.PriorityConfig) {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
if err := config.Reduce(results[index]); err != nil {
|
if err := config.Reduce(pod, results[index]); err != nil {
|
||||||
appendError(err)
|
appendError(err)
|
||||||
}
|
}
|
||||||
}(i, priorityConfig)
|
}(i, priorityConfig)
|
||||||
|
Loading…
Reference in New Issue
Block a user