mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-13 13:55:41 +00:00
MapReduce-like scheduler priority functions
This commit is contained in:
parent
6fcbbe8663
commit
33c710adf0
@ -67,7 +67,7 @@ func (c *CachedNodeInfo) GetNodeInfo(id string) (*api.Node, error) {
|
|||||||
return node.(*api.Node), nil
|
return node.(*api.Node), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// podMetadata is a type that is passed as metadata for predicate functions
|
// predicateMetadata is a type that is passed as metadata for predicate functions
|
||||||
type predicateMetadata struct {
|
type predicateMetadata struct {
|
||||||
podBestEffort bool
|
podBestEffort bool
|
||||||
podRequest *schedulercache.Resource
|
podRequest *schedulercache.Resource
|
||||||
|
@ -17,6 +17,7 @@ limitations under the License.
|
|||||||
package priorities
|
package priorities
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"math"
|
"math"
|
||||||
|
|
||||||
"github.com/golang/glog"
|
"github.com/golang/glog"
|
||||||
@ -28,6 +29,21 @@ import (
|
|||||||
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// priorityMetadata is a type that is passed as metadata for priority functions
|
||||||
|
type priorityMetadata struct {
|
||||||
|
nonZeroRequest *schedulercache.Resource
|
||||||
|
}
|
||||||
|
|
||||||
|
func PriorityMetadata(pod *api.Pod, nodes []*api.Node) interface{} {
|
||||||
|
// If we cannot compute metadata, just return nil
|
||||||
|
if pod == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return &priorityMetadata{
|
||||||
|
nonZeroRequest: getNonZeroRequests(pod),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func getNonZeroRequests(pod *api.Pod) *schedulercache.Resource {
|
func getNonZeroRequests(pod *api.Pod) *schedulercache.Resource {
|
||||||
result := &schedulercache.Resource{}
|
result := &schedulercache.Resource{}
|
||||||
for i := range pod.Spec.Containers {
|
for i := range pod.Spec.Containers {
|
||||||
@ -76,8 +92,12 @@ func calculateUsedScore(requested int64, capacity int64, node string) int64 {
|
|||||||
// Calculates host priority based on the amount of unused resources.
|
// Calculates host priority based on the amount of unused resources.
|
||||||
// 'node' has information about the resources on the node.
|
// 'node' has information about the resources on the node.
|
||||||
// 'pods' is a list of pods currently scheduled on the node.
|
// 'pods' is a list of pods currently scheduled on the node.
|
||||||
// TODO: Use Node() from nodeInfo instead of passing it.
|
func calculateUnusedPriority(pod *api.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
|
||||||
func calculateUnusedPriority(pod *api.Pod, podRequests *schedulercache.Resource, node *api.Node, nodeInfo *schedulercache.NodeInfo) schedulerapi.HostPriority {
|
node := nodeInfo.Node()
|
||||||
|
if node == nil {
|
||||||
|
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
|
||||||
|
}
|
||||||
|
|
||||||
allocatableResources := nodeInfo.AllocatableResource()
|
allocatableResources := nodeInfo.AllocatableResource()
|
||||||
totalResources := *podRequests
|
totalResources := *podRequests
|
||||||
totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU
|
totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU
|
||||||
@ -100,7 +120,7 @@ func calculateUnusedPriority(pod *api.Pod, podRequests *schedulercache.Resource,
|
|||||||
return schedulerapi.HostPriority{
|
return schedulerapi.HostPriority{
|
||||||
Host: node.Name,
|
Host: node.Name,
|
||||||
Score: int((cpuScore + memoryScore) / 2),
|
Score: int((cpuScore + memoryScore) / 2),
|
||||||
}
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Calculate the resource used on a node. 'node' has information about the resources on the node.
|
// Calculate the resource used on a node. 'node' has information about the resources on the node.
|
||||||
@ -136,13 +156,15 @@ func calculateUsedPriority(pod *api.Pod, podRequests *schedulercache.Resource, n
|
|||||||
// It calculates the percentage of memory and CPU requested by pods scheduled on the node, and prioritizes
|
// It calculates the percentage of memory and CPU requested by pods scheduled on the node, and prioritizes
|
||||||
// based on the minimum of the average of the fraction of requested to capacity.
|
// based on the minimum of the average of the fraction of requested to capacity.
|
||||||
// Details: cpu((capacity - sum(requested)) * 10 / capacity) + memory((capacity - sum(requested)) * 10 / capacity) / 2
|
// Details: cpu((capacity - sum(requested)) * 10 / capacity) + memory((capacity - sum(requested)) * 10 / capacity) / 2
|
||||||
func LeastRequestedPriority(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*api.Node) (schedulerapi.HostPriorityList, error) {
|
func LeastRequestedPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
|
||||||
podResources := getNonZeroRequests(pod)
|
var nonZeroRequest *schedulercache.Resource
|
||||||
list := make(schedulerapi.HostPriorityList, 0, len(nodes))
|
if priorityMeta, ok := meta.(*priorityMetadata); ok {
|
||||||
for _, node := range nodes {
|
nonZeroRequest = priorityMeta.nonZeroRequest
|
||||||
list = append(list, calculateUnusedPriority(pod, podResources, node, nodeNameToInfo[node.Name]))
|
} else {
|
||||||
|
// We couldn't parse metadata - fallback to computing it.
|
||||||
|
nonZeroRequest = getNonZeroRequests(pod)
|
||||||
}
|
}
|
||||||
return list, nil
|
return calculateUnusedPriority(pod, nonZeroRequest, nodeInfo)
|
||||||
}
|
}
|
||||||
|
|
||||||
// MostRequestedPriority is a priority function that favors nodes with most requested resources.
|
// MostRequestedPriority is a priority function that favors nodes with most requested resources.
|
||||||
|
@ -17,21 +17,21 @@ limitations under the License.
|
|||||||
package priorities
|
package priorities
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
|
"path/filepath"
|
||||||
"reflect"
|
"reflect"
|
||||||
"sort"
|
"sort"
|
||||||
"strconv"
|
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"k8s.io/kubernetes/cmd/libs/go2idl/parser"
|
||||||
"k8s.io/kubernetes/cmd/libs/go2idl/types"
|
"k8s.io/kubernetes/cmd/libs/go2idl/types"
|
||||||
"k8s.io/kubernetes/pkg/api"
|
"k8s.io/kubernetes/pkg/api"
|
||||||
"k8s.io/kubernetes/pkg/api/resource"
|
"k8s.io/kubernetes/pkg/api/resource"
|
||||||
"k8s.io/kubernetes/pkg/api/unversioned"
|
"k8s.io/kubernetes/pkg/api/unversioned"
|
||||||
"k8s.io/kubernetes/pkg/apis/extensions"
|
"k8s.io/kubernetes/pkg/apis/extensions"
|
||||||
"k8s.io/kubernetes/pkg/util/codeinspector"
|
"k8s.io/kubernetes/pkg/util/codeinspector"
|
||||||
"k8s.io/kubernetes/plugin/pkg/scheduler"
|
|
||||||
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
|
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
|
||||||
priorityutil "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities/util"
|
|
||||||
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
|
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
|
||||||
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
||||||
)
|
)
|
||||||
@ -52,126 +52,22 @@ func makeNode(node string, milliCPU, memory int64) *api.Node {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestZeroRequest(t *testing.T) {
|
func priorityFunction(mapFn algorithm.PriorityMapFunction, reduceFn algorithm.PriorityReduceFunction) algorithm.PriorityFunction {
|
||||||
// A pod with no resources. We expect spreading to count it as having the default resources.
|
return func(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*api.Node) (schedulerapi.HostPriorityList, error) {
|
||||||
noResources := api.PodSpec{
|
result := make(schedulerapi.HostPriorityList, 0, len(nodes))
|
||||||
Containers: []api.Container{
|
for i := range nodes {
|
||||||
{},
|
hostResult, err := mapFn(pod, nil, nodeNameToInfo[nodes[i].Name])
|
||||||
},
|
if err != nil {
|
||||||
}
|
return nil, err
|
||||||
noResources1 := noResources
|
}
|
||||||
noResources1.NodeName = "machine1"
|
result = append(result, hostResult)
|
||||||
// A pod with the same resources as a 0-request pod gets by default as its resources (for spreading).
|
|
||||||
small := api.PodSpec{
|
|
||||||
Containers: []api.Container{
|
|
||||||
{
|
|
||||||
Resources: api.ResourceRequirements{
|
|
||||||
Requests: api.ResourceList{
|
|
||||||
"cpu": resource.MustParse(
|
|
||||||
strconv.FormatInt(priorityutil.DefaultMilliCpuRequest, 10) + "m"),
|
|
||||||
"memory": resource.MustParse(
|
|
||||||
strconv.FormatInt(priorityutil.DefaultMemoryRequest, 10)),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
small2 := small
|
|
||||||
small2.NodeName = "machine2"
|
|
||||||
// A larger pod.
|
|
||||||
large := api.PodSpec{
|
|
||||||
Containers: []api.Container{
|
|
||||||
{
|
|
||||||
Resources: api.ResourceRequirements{
|
|
||||||
Requests: api.ResourceList{
|
|
||||||
"cpu": resource.MustParse(
|
|
||||||
strconv.FormatInt(priorityutil.DefaultMilliCpuRequest*3, 10) + "m"),
|
|
||||||
"memory": resource.MustParse(
|
|
||||||
strconv.FormatInt(priorityutil.DefaultMemoryRequest*3, 10)),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
large1 := large
|
|
||||||
large1.NodeName = "machine1"
|
|
||||||
large2 := large
|
|
||||||
large2.NodeName = "machine2"
|
|
||||||
tests := []struct {
|
|
||||||
pod *api.Pod
|
|
||||||
pods []*api.Pod
|
|
||||||
nodes []*api.Node
|
|
||||||
test string
|
|
||||||
}{
|
|
||||||
// The point of these next two tests is to show you get the same priority for a zero-request pod
|
|
||||||
// as for a pod with the defaults requests, both when the zero-request pod is already on the machine
|
|
||||||
// and when the zero-request pod is the one being scheduled.
|
|
||||||
{
|
|
||||||
pod: &api.Pod{Spec: noResources},
|
|
||||||
nodes: []*api.Node{makeNode("machine1", 1000, priorityutil.DefaultMemoryRequest*10), makeNode("machine2", 1000, priorityutil.DefaultMemoryRequest*10)},
|
|
||||||
test: "test priority of zero-request pod with machine with zero-request pod",
|
|
||||||
pods: []*api.Pod{
|
|
||||||
{Spec: large1}, {Spec: noResources1},
|
|
||||||
{Spec: large2}, {Spec: small2},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
pod: &api.Pod{Spec: small},
|
|
||||||
nodes: []*api.Node{makeNode("machine1", 1000, priorityutil.DefaultMemoryRequest*10), makeNode("machine2", 1000, priorityutil.DefaultMemoryRequest*10)},
|
|
||||||
test: "test priority of nonzero-request pod with machine with zero-request pod",
|
|
||||||
pods: []*api.Pod{
|
|
||||||
{Spec: large1}, {Spec: noResources1},
|
|
||||||
{Spec: large2}, {Spec: small2},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// The point of this test is to verify that we're not just getting the same score no matter what we schedule.
|
|
||||||
{
|
|
||||||
pod: &api.Pod{Spec: large},
|
|
||||||
nodes: []*api.Node{makeNode("machine1", 1000, priorityutil.DefaultMemoryRequest*10), makeNode("machine2", 1000, priorityutil.DefaultMemoryRequest*10)},
|
|
||||||
test: "test priority of larger pod with machine with zero-request pod",
|
|
||||||
pods: []*api.Pod{
|
|
||||||
{Spec: large1}, {Spec: noResources1},
|
|
||||||
{Spec: large2}, {Spec: small2},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
const expectedPriority int = 25
|
|
||||||
for _, test := range tests {
|
|
||||||
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
|
|
||||||
list, err := scheduler.PrioritizeNodes(
|
|
||||||
test.pod,
|
|
||||||
nodeNameToInfo,
|
|
||||||
// This should match the configuration in defaultPriorities() in
|
|
||||||
// plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go if you want
|
|
||||||
// to test what's actually in production.
|
|
||||||
[]algorithm.PriorityConfig{
|
|
||||||
{Function: LeastRequestedPriority, Weight: 1},
|
|
||||||
{Function: BalancedResourceAllocation, Weight: 1},
|
|
||||||
{
|
|
||||||
Function: NewSelectorSpreadPriority(
|
|
||||||
algorithm.FakePodLister(test.pods),
|
|
||||||
algorithm.FakeServiceLister([]api.Service{}),
|
|
||||||
algorithm.FakeControllerLister([]api.ReplicationController{}),
|
|
||||||
algorithm.FakeReplicaSetLister([]extensions.ReplicaSet{})),
|
|
||||||
Weight: 1,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
algorithm.FakeNodeLister(test.nodes), []algorithm.SchedulerExtender{})
|
|
||||||
if err != nil {
|
|
||||||
t.Errorf("unexpected error: %v", err)
|
|
||||||
}
|
}
|
||||||
for _, hp := range list {
|
if reduceFn != nil {
|
||||||
if test.test == "test priority of larger pod with machine with zero-request pod" {
|
if err := reduceFn(result); err != nil {
|
||||||
if hp.Score == expectedPriority {
|
return nil, err
|
||||||
t.Errorf("%s: expected non-%d for all priorities, got list %#v", test.test, expectedPriority, list)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if hp.Score != expectedPriority {
|
|
||||||
t.Errorf("%s: expected %d for all priorities, got list %#v", test.test, expectedPriority, list)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return result, nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -401,7 +297,8 @@ func TestLeastRequested(t *testing.T) {
|
|||||||
|
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
|
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
|
||||||
list, err := LeastRequestedPriority(test.pod, nodeNameToInfo, test.nodes)
|
lrp := priorityFunction(LeastRequestedPriorityMap, nil)
|
||||||
|
list, err := lrp(test.pod, nodeNameToInfo, test.nodes)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("unexpected error: %v", err)
|
t.Errorf("unexpected error: %v", err)
|
||||||
}
|
}
|
||||||
@ -1054,6 +951,29 @@ func makeImageNode(node string, status api.NodeStatus) *api.Node {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getPrioritySignatures() ([]*types.Signature, error) {
|
||||||
|
filePath := "./../types.go"
|
||||||
|
pkgName := filepath.Dir(filePath)
|
||||||
|
builder := parser.New()
|
||||||
|
if err := builder.AddDir(pkgName); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
universe, err := builder.FindTypes()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
signatures := []string{"PriorityFunction", "PriorityMapFunction", "PriorityReduceFunction"}
|
||||||
|
results := make([]*types.Signature, 0, len(signatures))
|
||||||
|
for _, signature := range signatures {
|
||||||
|
result, ok := universe[pkgName].Types[signature]
|
||||||
|
if !ok {
|
||||||
|
return nil, fmt.Errorf("%s type not defined", signature)
|
||||||
|
}
|
||||||
|
results = append(results, result.Signature)
|
||||||
|
}
|
||||||
|
return results, nil
|
||||||
|
}
|
||||||
|
|
||||||
func TestPrioritiesRegistered(t *testing.T) {
|
func TestPrioritiesRegistered(t *testing.T) {
|
||||||
var functions []*types.Type
|
var functions []*types.Type
|
||||||
|
|
||||||
@ -1080,8 +1000,30 @@ func TestPrioritiesRegistered(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
prioritySignatures, err := getPrioritySignatures()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Couldn't get priorities signatures")
|
||||||
|
}
|
||||||
|
|
||||||
// Check if all public priorities are referenced in target files.
|
// Check if all public priorities are referenced in target files.
|
||||||
for _, function := range functions {
|
for _, function := range functions {
|
||||||
|
// Ignore functions that don't match priorities signatures.
|
||||||
|
signature := function.Underlying.Signature
|
||||||
|
match := false
|
||||||
|
for _, prioritySignature := range prioritySignatures {
|
||||||
|
if len(prioritySignature.Parameters) != len(signature.Parameters) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if len(prioritySignature.Results) != len(signature.Results) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// TODO: Check exact types of parameters and results.
|
||||||
|
match = true
|
||||||
|
}
|
||||||
|
if !match {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
args := []string{"-rl", function.Name.Name}
|
args := []string{"-rl", function.Name.Name}
|
||||||
args = append(args, targetFiles...)
|
args = append(args, targetFiles...)
|
||||||
|
|
||||||
|
@ -26,9 +26,24 @@ import (
|
|||||||
// The failure information is given by the error.
|
// The failure information is given by the error.
|
||||||
type FitPredicate func(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, []PredicateFailureReason, error)
|
type FitPredicate func(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, []PredicateFailureReason, error)
|
||||||
|
|
||||||
|
// PriorityMapFunction is a function that computes per-node results for a given node.
|
||||||
|
// TODO: Figure out the exact API of this method.
|
||||||
|
type PriorityMapFunction func(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error)
|
||||||
|
|
||||||
|
// PriorityReduceFunction is a function that aggregated per-node results and computes
|
||||||
|
// final scores for all nodes.
|
||||||
|
// TODO: Figure out the exact API of this method.
|
||||||
|
type PriorityReduceFunction func(result schedulerapi.HostPriorityList) error
|
||||||
|
|
||||||
|
// DEPRECATED
|
||||||
|
// Use Map-Reduce pattern for priority functions.
|
||||||
type PriorityFunction func(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*api.Node) (schedulerapi.HostPriorityList, error)
|
type PriorityFunction func(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*api.Node) (schedulerapi.HostPriorityList, error)
|
||||||
|
|
||||||
type PriorityConfig struct {
|
type PriorityConfig struct {
|
||||||
|
Map PriorityMapFunction
|
||||||
|
Reduce PriorityReduceFunction
|
||||||
|
// TODO: Remove it after migrating all functions to
|
||||||
|
// Map-Reduce pattern.
|
||||||
Function PriorityFunction
|
Function PriorityFunction
|
||||||
Weight int
|
Weight int
|
||||||
}
|
}
|
||||||
|
@ -165,7 +165,7 @@ func defaultPredicates() sets.String {
|
|||||||
func defaultPriorities() sets.String {
|
func defaultPriorities() sets.String {
|
||||||
return sets.NewString(
|
return sets.NewString(
|
||||||
// Prioritize nodes by least requested utilization.
|
// Prioritize nodes by least requested utilization.
|
||||||
factory.RegisterPriorityFunction("LeastRequestedPriority", priorities.LeastRequestedPriority, 1),
|
factory.RegisterPriorityFunction2("LeastRequestedPriority", priorities.LeastRequestedPriorityMap, nil, 1),
|
||||||
// Prioritizes nodes to help achieve balanced resource usage
|
// Prioritizes nodes to help achieve balanced resource usage
|
||||||
factory.RegisterPriorityFunction("BalancedResourceAllocation", priorities.BalancedResourceAllocation, 1),
|
factory.RegisterPriorityFunction("BalancedResourceAllocation", priorities.BalancedResourceAllocation, 1),
|
||||||
// spreads pods by minimizing the number of pods (belonging to the same service or replication controller) on the same node.
|
// spreads pods by minimizing the number of pods (belonging to the same service or replication controller) on the same node.
|
||||||
|
@ -48,13 +48,21 @@ type PluginFactoryArgs struct {
|
|||||||
// A FitPredicateFactory produces a FitPredicate from the given args.
|
// A FitPredicateFactory produces a FitPredicate from the given args.
|
||||||
type FitPredicateFactory func(PluginFactoryArgs) algorithm.FitPredicate
|
type FitPredicateFactory func(PluginFactoryArgs) algorithm.FitPredicate
|
||||||
|
|
||||||
|
// DEPRECATED
|
||||||
|
// Use Map-Reduce pattern for priority functions.
|
||||||
// A PriorityFunctionFactory produces a PriorityConfig from the given args.
|
// A PriorityFunctionFactory produces a PriorityConfig from the given args.
|
||||||
type PriorityFunctionFactory func(PluginFactoryArgs) algorithm.PriorityFunction
|
type PriorityFunctionFactory func(PluginFactoryArgs) algorithm.PriorityFunction
|
||||||
|
|
||||||
|
// A PriorityFunctionFactory produces map & reduce priority functions
|
||||||
|
// from a given args.
|
||||||
|
// FIXME: Rename to PriorityFunctionFactory.
|
||||||
|
type PriorityFunctionFactory2 func(PluginFactoryArgs) (algorithm.PriorityMapFunction, algorithm.PriorityReduceFunction)
|
||||||
|
|
||||||
// A PriorityConfigFactory produces a PriorityConfig from the given function and weight
|
// A PriorityConfigFactory produces a PriorityConfig from the given function and weight
|
||||||
type PriorityConfigFactory struct {
|
type PriorityConfigFactory struct {
|
||||||
Function PriorityFunctionFactory
|
Function PriorityFunctionFactory
|
||||||
Weight int
|
MapReduceFunction PriorityFunctionFactory2
|
||||||
|
Weight int
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@ -139,6 +147,8 @@ func IsFitPredicateRegistered(name string) bool {
|
|||||||
return ok
|
return ok
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// DEPRECATED
|
||||||
|
// Use Map-Reduce pattern for priority functions.
|
||||||
// Registers a priority function with the algorithm registry. Returns the name,
|
// Registers a priority function with the algorithm registry. Returns the name,
|
||||||
// with which the function was registered.
|
// with which the function was registered.
|
||||||
func RegisterPriorityFunction(name string, function algorithm.PriorityFunction, weight int) string {
|
func RegisterPriorityFunction(name string, function algorithm.PriorityFunction, weight int) string {
|
||||||
@ -150,6 +160,22 @@ func RegisterPriorityFunction(name string, function algorithm.PriorityFunction,
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Registers a priority function with the algorithm registry. Returns the name,
|
||||||
|
// with which the function was registered.
|
||||||
|
// FIXME: Rename to PriorityFunctionFactory.
|
||||||
|
func RegisterPriorityFunction2(
|
||||||
|
name string,
|
||||||
|
mapFunction algorithm.PriorityMapFunction,
|
||||||
|
reduceFunction algorithm.PriorityReduceFunction,
|
||||||
|
weight int) string {
|
||||||
|
return RegisterPriorityConfigFactory(name, PriorityConfigFactory{
|
||||||
|
MapReduceFunction: func(PluginFactoryArgs) (algorithm.PriorityMapFunction, algorithm.PriorityReduceFunction) {
|
||||||
|
return mapFunction, reduceFunction
|
||||||
|
},
|
||||||
|
Weight: weight,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
func RegisterPriorityConfigFactory(name string, pcf PriorityConfigFactory) string {
|
func RegisterPriorityConfigFactory(name string, pcf PriorityConfigFactory) string {
|
||||||
schedulerFactoryMutex.Lock()
|
schedulerFactoryMutex.Lock()
|
||||||
defer schedulerFactoryMutex.Unlock()
|
defer schedulerFactoryMutex.Unlock()
|
||||||
@ -193,8 +219,9 @@ func RegisterCustomPriorityFunction(policy schedulerapi.PriorityPolicy) string {
|
|||||||
glog.V(2).Infof("Priority type %s already registered, reusing.", policy.Name)
|
glog.V(2).Infof("Priority type %s already registered, reusing.", policy.Name)
|
||||||
// set/update the weight based on the policy
|
// set/update the weight based on the policy
|
||||||
pcf = &PriorityConfigFactory{
|
pcf = &PriorityConfigFactory{
|
||||||
Function: existing_pcf.Function,
|
Function: existing_pcf.Function,
|
||||||
Weight: policy.Weight,
|
MapReduceFunction: existing_pcf.MapReduceFunction,
|
||||||
|
Weight: policy.Weight,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -265,10 +292,19 @@ func getPriorityFunctionConfigs(names sets.String, args PluginFactoryArgs) ([]al
|
|||||||
if !ok {
|
if !ok {
|
||||||
return nil, fmt.Errorf("Invalid priority name %s specified - no corresponding function found", name)
|
return nil, fmt.Errorf("Invalid priority name %s specified - no corresponding function found", name)
|
||||||
}
|
}
|
||||||
configs = append(configs, algorithm.PriorityConfig{
|
if factory.Function != nil {
|
||||||
Function: factory.Function(args),
|
configs = append(configs, algorithm.PriorityConfig{
|
||||||
Weight: factory.Weight,
|
Function: factory.Function(args),
|
||||||
})
|
Weight: factory.Weight,
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
mapFunction, reduceFunction := factory.MapReduceFunction(args)
|
||||||
|
configs = append(configs, algorithm.PriorityConfig{
|
||||||
|
Map: mapFunction,
|
||||||
|
Reduce: reduceFunction,
|
||||||
|
Weight: factory.Weight,
|
||||||
|
})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return configs, nil
|
return configs, nil
|
||||||
}
|
}
|
||||||
|
@ -32,6 +32,7 @@ import (
|
|||||||
"k8s.io/kubernetes/pkg/util/workqueue"
|
"k8s.io/kubernetes/pkg/util/workqueue"
|
||||||
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
|
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
|
||||||
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates"
|
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates"
|
||||||
|
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities"
|
||||||
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
|
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
|
||||||
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
||||||
)
|
)
|
||||||
@ -237,7 +238,7 @@ func PrioritizeNodes(
|
|||||||
nodes []*api.Node,
|
nodes []*api.Node,
|
||||||
extenders []algorithm.SchedulerExtender,
|
extenders []algorithm.SchedulerExtender,
|
||||||
) (schedulerapi.HostPriorityList, error) {
|
) (schedulerapi.HostPriorityList, error) {
|
||||||
result := make(schedulerapi.HostPriorityList, 0, len(nodeNameToInfo))
|
result := make(schedulerapi.HostPriorityList, 0, len(nodes))
|
||||||
|
|
||||||
// If no priority configs are provided, then the EqualPriority function is applied
|
// If no priority configs are provided, then the EqualPriority function is applied
|
||||||
// This is required to generate the priority list in the required format
|
// This is required to generate the priority list in the required format
|
||||||
@ -252,6 +253,7 @@ func PrioritizeNodes(
|
|||||||
errs []error
|
errs []error
|
||||||
)
|
)
|
||||||
|
|
||||||
|
meta := priorities.PriorityMetadata(pod, nodes)
|
||||||
for _, priorityConfig := range priorityConfigs {
|
for _, priorityConfig := range priorityConfigs {
|
||||||
// skip the priority function if the weight is specified as 0
|
// skip the priority function if the weight is specified as 0
|
||||||
if priorityConfig.Weight == 0 {
|
if priorityConfig.Weight == 0 {
|
||||||
@ -262,8 +264,26 @@ func PrioritizeNodes(
|
|||||||
go func(config algorithm.PriorityConfig) {
|
go func(config algorithm.PriorityConfig) {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
weight := config.Weight
|
weight := config.Weight
|
||||||
priorityFunc := config.Function
|
|
||||||
prioritizedList, err := priorityFunc(pod, nodeNameToInfo, nodes)
|
prioritizedList, err := func() (schedulerapi.HostPriorityList, error) {
|
||||||
|
if config.Function != nil {
|
||||||
|
return config.Function(pod, nodeNameToInfo, nodes)
|
||||||
|
}
|
||||||
|
prioritizedList := make(schedulerapi.HostPriorityList, 0, len(nodes))
|
||||||
|
for i := range nodes {
|
||||||
|
hostResult, err := config.Map(pod, meta, nodeNameToInfo[nodes[i].Name])
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
prioritizedList = append(prioritizedList, hostResult)
|
||||||
|
}
|
||||||
|
if config.Reduce != nil {
|
||||||
|
if err := config.Reduce(prioritizedList); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return prioritizedList, nil
|
||||||
|
}()
|
||||||
|
|
||||||
mu.Lock()
|
mu.Lock()
|
||||||
defer mu.Unlock()
|
defer mu.Unlock()
|
||||||
@ -277,13 +297,12 @@ func PrioritizeNodes(
|
|||||||
}
|
}
|
||||||
}(priorityConfig)
|
}(priorityConfig)
|
||||||
}
|
}
|
||||||
|
// wait for all go routines to finish
|
||||||
|
wg.Wait()
|
||||||
if len(errs) != 0 {
|
if len(errs) != 0 {
|
||||||
return schedulerapi.HostPriorityList{}, errors.NewAggregate(errs)
|
return schedulerapi.HostPriorityList{}, errors.NewAggregate(errs)
|
||||||
}
|
}
|
||||||
|
|
||||||
// wait for all go routines to finish
|
|
||||||
wg.Wait()
|
|
||||||
|
|
||||||
if len(extenders) != 0 && nodes != nil {
|
if len(extenders) != 0 && nodes != nil {
|
||||||
for _, extender := range extenders {
|
for _, extender := range extenders {
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
|
@ -25,10 +25,14 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"k8s.io/kubernetes/pkg/api"
|
"k8s.io/kubernetes/pkg/api"
|
||||||
|
"k8s.io/kubernetes/pkg/api/resource"
|
||||||
|
"k8s.io/kubernetes/pkg/apis/extensions"
|
||||||
"k8s.io/kubernetes/pkg/util/sets"
|
"k8s.io/kubernetes/pkg/util/sets"
|
||||||
"k8s.io/kubernetes/pkg/util/wait"
|
"k8s.io/kubernetes/pkg/util/wait"
|
||||||
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
|
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
|
||||||
algorithmpredicates "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates"
|
algorithmpredicates "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates"
|
||||||
|
algorithmpriorities "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities"
|
||||||
|
priorityutil "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities/util"
|
||||||
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
|
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
|
||||||
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
||||||
)
|
)
|
||||||
@ -372,3 +376,146 @@ func TestFindFitSomeError(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func makeNode(node string, milliCPU, memory int64) *api.Node {
|
||||||
|
return &api.Node{
|
||||||
|
ObjectMeta: api.ObjectMeta{Name: node},
|
||||||
|
Status: api.NodeStatus{
|
||||||
|
Capacity: api.ResourceList{
|
||||||
|
"cpu": *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
|
||||||
|
"memory": *resource.NewQuantity(memory, resource.BinarySI),
|
||||||
|
},
|
||||||
|
Allocatable: api.ResourceList{
|
||||||
|
"cpu": *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
|
||||||
|
"memory": *resource.NewQuantity(memory, resource.BinarySI),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// The point of this test is to show that you:
|
||||||
|
// - get the same priority for a zero-request pod as for a pod with the defaults requests,
|
||||||
|
// both when the zero-request pod is already on the machine and when the zero-request pod
|
||||||
|
// is the one being scheduled.
|
||||||
|
// - don't get the same score no matter what we schedule.
|
||||||
|
func TestZeroRequest(t *testing.T) {
|
||||||
|
// A pod with no resources. We expect spreading to count it as having the default resources.
|
||||||
|
noResources := api.PodSpec{
|
||||||
|
Containers: []api.Container{
|
||||||
|
{},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
noResources1 := noResources
|
||||||
|
noResources1.NodeName = "machine1"
|
||||||
|
// A pod with the same resources as a 0-request pod gets by default as its resources (for spreading).
|
||||||
|
small := api.PodSpec{
|
||||||
|
Containers: []api.Container{
|
||||||
|
{
|
||||||
|
Resources: api.ResourceRequirements{
|
||||||
|
Requests: api.ResourceList{
|
||||||
|
"cpu": resource.MustParse(
|
||||||
|
strconv.FormatInt(priorityutil.DefaultMilliCpuRequest, 10) + "m"),
|
||||||
|
"memory": resource.MustParse(
|
||||||
|
strconv.FormatInt(priorityutil.DefaultMemoryRequest, 10)),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
small2 := small
|
||||||
|
small2.NodeName = "machine2"
|
||||||
|
// A larger pod.
|
||||||
|
large := api.PodSpec{
|
||||||
|
Containers: []api.Container{
|
||||||
|
{
|
||||||
|
Resources: api.ResourceRequirements{
|
||||||
|
Requests: api.ResourceList{
|
||||||
|
"cpu": resource.MustParse(
|
||||||
|
strconv.FormatInt(priorityutil.DefaultMilliCpuRequest*3, 10) + "m"),
|
||||||
|
"memory": resource.MustParse(
|
||||||
|
strconv.FormatInt(priorityutil.DefaultMemoryRequest*3, 10)),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
large1 := large
|
||||||
|
large1.NodeName = "machine1"
|
||||||
|
large2 := large
|
||||||
|
large2.NodeName = "machine2"
|
||||||
|
tests := []struct {
|
||||||
|
pod *api.Pod
|
||||||
|
pods []*api.Pod
|
||||||
|
nodes []*api.Node
|
||||||
|
test string
|
||||||
|
}{
|
||||||
|
// The point of these next two tests is to show you get the same priority for a zero-request pod
|
||||||
|
// as for a pod with the defaults requests, both when the zero-request pod is already on the machine
|
||||||
|
// and when the zero-request pod is the one being scheduled.
|
||||||
|
{
|
||||||
|
pod: &api.Pod{Spec: noResources},
|
||||||
|
nodes: []*api.Node{makeNode("machine1", 1000, priorityutil.DefaultMemoryRequest*10), makeNode("machine2", 1000, priorityutil.DefaultMemoryRequest*10)},
|
||||||
|
test: "test priority of zero-request pod with machine with zero-request pod",
|
||||||
|
pods: []*api.Pod{
|
||||||
|
{Spec: large1}, {Spec: noResources1},
|
||||||
|
{Spec: large2}, {Spec: small2},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pod: &api.Pod{Spec: small},
|
||||||
|
nodes: []*api.Node{makeNode("machine1", 1000, priorityutil.DefaultMemoryRequest*10), makeNode("machine2", 1000, priorityutil.DefaultMemoryRequest*10)},
|
||||||
|
test: "test priority of nonzero-request pod with machine with zero-request pod",
|
||||||
|
pods: []*api.Pod{
|
||||||
|
{Spec: large1}, {Spec: noResources1},
|
||||||
|
{Spec: large2}, {Spec: small2},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
// The point of this test is to verify that we're not just getting the same score no matter what we schedule.
|
||||||
|
{
|
||||||
|
pod: &api.Pod{Spec: large},
|
||||||
|
nodes: []*api.Node{makeNode("machine1", 1000, priorityutil.DefaultMemoryRequest*10), makeNode("machine2", 1000, priorityutil.DefaultMemoryRequest*10)},
|
||||||
|
test: "test priority of larger pod with machine with zero-request pod",
|
||||||
|
pods: []*api.Pod{
|
||||||
|
{Spec: large1}, {Spec: noResources1},
|
||||||
|
{Spec: large2}, {Spec: small2},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
const expectedPriority int = 25
|
||||||
|
for _, test := range tests {
|
||||||
|
// This should match the configuration in defaultPriorities() in
|
||||||
|
// plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go if you want
|
||||||
|
// to test what's actually in production.
|
||||||
|
priorityConfigs := []algorithm.PriorityConfig{
|
||||||
|
{Map: algorithmpriorities.LeastRequestedPriorityMap, Weight: 1},
|
||||||
|
{Function: algorithmpriorities.BalancedResourceAllocation, Weight: 1},
|
||||||
|
{
|
||||||
|
Function: algorithmpriorities.NewSelectorSpreadPriority(
|
||||||
|
algorithm.FakePodLister(test.pods),
|
||||||
|
algorithm.FakeServiceLister([]api.Service{}),
|
||||||
|
algorithm.FakeControllerLister([]api.ReplicationController{}),
|
||||||
|
algorithm.FakeReplicaSetLister([]extensions.ReplicaSet{})),
|
||||||
|
Weight: 1,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
|
||||||
|
list, err := PrioritizeNodes(
|
||||||
|
test.pod, nodeNameToInfo, priorityConfigs,
|
||||||
|
algorithm.FakeNodeLister(test.nodes), []algorithm.SchedulerExtender{})
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
for _, hp := range list {
|
||||||
|
if test.test == "test priority of larger pod with machine with zero-request pod" {
|
||||||
|
if hp.Score == expectedPriority {
|
||||||
|
t.Errorf("%s: expected non-%d for all priorities, got list %#v", test.test, expectedPriority, list)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if hp.Score != expectedPriority {
|
||||||
|
t.Errorf("%s: expected %d for all priorities, got list %#v", test.test, expectedPriority, list)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user