From cab7db3a64d6931b807b190de2949900721e2ad3 Mon Sep 17 00:00:00 2001 From: Wojciech Tyczynski Date: Tue, 19 Jul 2016 14:35:43 +0200 Subject: [PATCH 1/2] Use []*api.Node instead of NodeLister in priority functions. --- .../algorithm/priorities/interpod_affinity.go | 6 +-- .../priorities/interpod_affinity_test.go | 6 +-- .../algorithm/priorities/node_affinity.go | 8 +--- .../priorities/node_affinity_test.go | 3 +- .../algorithm/priorities/priorities.go | 40 ++++--------------- .../algorithm/priorities/priorities_test.go | 10 ++--- .../priorities/selector_spreading.go | 14 +------ .../priorities/selector_spreading_test.go | 6 +-- .../algorithm/priorities/taint_toleration.go | 8 +--- .../priorities/taint_toleration_test.go | 6 +-- plugin/pkg/scheduler/algorithm/types.go | 2 +- plugin/pkg/scheduler/extender_test.go | 7 +--- plugin/pkg/scheduler/factory/factory_test.go | 5 +-- plugin/pkg/scheduler/generic_scheduler.go | 31 ++++++-------- .../pkg/scheduler/generic_scheduler_test.go | 15 +++---- 15 files changed, 47 insertions(+), 120 deletions(-) diff --git a/plugin/pkg/scheduler/algorithm/priorities/interpod_affinity.go b/plugin/pkg/scheduler/algorithm/priorities/interpod_affinity.go index b506a1b1f0f..e68593ebfc0 100644 --- a/plugin/pkg/scheduler/algorithm/priorities/interpod_affinity.go +++ b/plugin/pkg/scheduler/algorithm/priorities/interpod_affinity.go @@ -72,11 +72,7 @@ func podMatchesNamespaceAndSelector(pod *api.Pod, affinityPod *api.Pod, term *ap // that node; the node(s) with the highest sum are the most preferred. // Symmetry need to be considered for preferredDuringSchedulingIgnoredDuringExecution from podAffinity & podAntiAffinity, // symmetry need to be considered for hard requirements from podAffinity -func (ipa *InterPodAffinity) CalculateInterPodAffinityPriority(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodeLister algorithm.NodeLister) (schedulerapi.HostPriorityList, error) { - nodes, err := nodeLister.List() - if err != nil { - return nil, err - } +func (ipa *InterPodAffinity) CalculateInterPodAffinityPriority(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*api.Node) (schedulerapi.HostPriorityList, error) { allPods, err := ipa.podLister.List(labels.Everything()) if err != nil { return nil, err diff --git a/plugin/pkg/scheduler/algorithm/priorities/interpod_affinity_test.go b/plugin/pkg/scheduler/algorithm/priorities/interpod_affinity_test.go index f141c121480..aaed1661cd7 100644 --- a/plugin/pkg/scheduler/algorithm/priorities/interpod_affinity_test.go +++ b/plugin/pkg/scheduler/algorithm/priorities/interpod_affinity_test.go @@ -504,7 +504,7 @@ func TestInterPodAffinityPriority(t *testing.T) { hardPodAffinityWeight: api.DefaultHardPodAffinitySymmetricWeight, failureDomains: priorityutil.Topologies{DefaultKeys: strings.Split(api.DefaultFailureDomains, ",")}, } - list, err := interPodAffinity.CalculateInterPodAffinityPriority(test.pod, nodeNameToInfo, algorithm.FakeNodeLister(test.nodes)) + list, err := interPodAffinity.CalculateInterPodAffinityPriority(test.pod, nodeNameToInfo, test.nodes) if err != nil { t.Errorf("unexpected error: %v", err) } @@ -592,7 +592,7 @@ func TestHardPodAffinitySymmetricWeight(t *testing.T) { podLister: algorithm.FakePodLister(test.pods), hardPodAffinityWeight: test.hardPodAffinityWeight, } - list, err := ipa.CalculateInterPodAffinityPriority(test.pod, nodeNameToInfo, algorithm.FakeNodeLister(test.nodes)) + list, err := ipa.CalculateInterPodAffinityPriority(test.pod, nodeNameToInfo, test.nodes) if err != nil { t.Errorf("unexpected error: %v", err) } @@ -677,7 +677,7 @@ func TestSoftPodAntiAffinityWithFailureDomains(t *testing.T) { hardPodAffinityWeight: api.DefaultHardPodAffinitySymmetricWeight, failureDomains: test.failureDomains, } - list, err := ipa.CalculateInterPodAffinityPriority(test.pod, nodeNameToInfo, algorithm.FakeNodeLister(test.nodes)) + list, err := ipa.CalculateInterPodAffinityPriority(test.pod, nodeNameToInfo, test.nodes) if err != nil { t.Errorf("unexpected error: %v", err) } diff --git a/plugin/pkg/scheduler/algorithm/priorities/node_affinity.go b/plugin/pkg/scheduler/algorithm/priorities/node_affinity.go index 80c9da52d03..53b8b87ffc1 100644 --- a/plugin/pkg/scheduler/algorithm/priorities/node_affinity.go +++ b/plugin/pkg/scheduler/algorithm/priorities/node_affinity.go @@ -20,7 +20,6 @@ import ( "github.com/golang/glog" "k8s.io/kubernetes/pkg/api" "k8s.io/kubernetes/pkg/labels" - "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm" schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api" "k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache" ) @@ -30,12 +29,7 @@ import ( // it will a get an add of preferredSchedulingTerm.Weight. Thus, the more preferredSchedulingTerms // the node satisfies and the more the preferredSchedulingTerm that is satisfied weights, the higher // score the node gets. -func CalculateNodeAffinityPriority(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodeLister algorithm.NodeLister) (schedulerapi.HostPriorityList, error) { - nodes, err := nodeLister.List() - if err != nil { - return nil, err - } - +func CalculateNodeAffinityPriority(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*api.Node) (schedulerapi.HostPriorityList, error) { var maxCount float64 counts := make(map[string]float64, len(nodes)) diff --git a/plugin/pkg/scheduler/algorithm/priorities/node_affinity_test.go b/plugin/pkg/scheduler/algorithm/priorities/node_affinity_test.go index a3f1595185a..b6bf33c626f 100644 --- a/plugin/pkg/scheduler/algorithm/priorities/node_affinity_test.go +++ b/plugin/pkg/scheduler/algorithm/priorities/node_affinity_test.go @@ -21,7 +21,6 @@ import ( "testing" "k8s.io/kubernetes/pkg/api" - "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm" schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api" "k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache" ) @@ -156,7 +155,7 @@ func TestNodeAffinityPriority(t *testing.T) { } for _, test := range tests { - list, err := CalculateNodeAffinityPriority(test.pod, schedulercache.CreateNodeNameToInfoMap(nil), algorithm.FakeNodeLister(test.nodes)) + list, err := CalculateNodeAffinityPriority(test.pod, schedulercache.CreateNodeNameToInfoMap(nil), test.nodes) if err != nil { t.Errorf("unexpected error: %v", err) } diff --git a/plugin/pkg/scheduler/algorithm/priorities/priorities.go b/plugin/pkg/scheduler/algorithm/priorities/priorities.go index b0da76dd337..4862453aecc 100644 --- a/plugin/pkg/scheduler/algorithm/priorities/priorities.go +++ b/plugin/pkg/scheduler/algorithm/priorities/priorities.go @@ -86,12 +86,7 @@ func calculateResourceOccupancy(pod *api.Pod, podRequests *schedulercache.Resour // It calculates the percentage of memory and CPU requested by pods scheduled on the node, and prioritizes // based on the minimum of the average of the fraction of requested to capacity. // Details: cpu((capacity - sum(requested)) * 10 / capacity) + memory((capacity - sum(requested)) * 10 / capacity) / 2 -func LeastRequestedPriority(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodeLister algorithm.NodeLister) (schedulerapi.HostPriorityList, error) { - nodes, err := nodeLister.List() - if err != nil { - return schedulerapi.HostPriorityList{}, err - } - +func LeastRequestedPriority(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*api.Node) (schedulerapi.HostPriorityList, error) { podResources := getNonZeroRequests(pod) list := make(schedulerapi.HostPriorityList, 0, len(nodes)) for _, node := range nodes { @@ -116,13 +111,8 @@ func NewNodeLabelPriority(label string, presence bool) algorithm.PriorityFunctio // CalculateNodeLabelPriority checks whether a particular label exists on a node or not, regardless of its value. // If presence is true, prioritizes nodes that have the specified label, regardless of value. // If presence is false, prioritizes nodes that do not have the specified label. -func (n *NodeLabelPrioritizer) CalculateNodeLabelPriority(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodeLister algorithm.NodeLister) (schedulerapi.HostPriorityList, error) { +func (n *NodeLabelPrioritizer) CalculateNodeLabelPriority(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*api.Node) (schedulerapi.HostPriorityList, error) { var score int - nodes, err := nodeLister.List() - if err != nil { - return nil, err - } - labeledNodes := map[string]bool{} for _, node := range nodes { exists := labels.Set(node.Labels).Has(n.label) @@ -155,14 +145,8 @@ const ( // based on the total size of those images. // - If none of the images are present, this node will be given the lowest priority. // - If some of the images are present on a node, the larger their sizes' sum, the higher the node's priority. -func ImageLocalityPriority(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodeLister algorithm.NodeLister) (schedulerapi.HostPriorityList, error) { +func ImageLocalityPriority(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*api.Node) (schedulerapi.HostPriorityList, error) { sumSizeMap := make(map[string]int64) - - nodes, err := nodeLister.List() - if err != nil { - return nil, err - } - for i := range pod.Spec.Containers { for _, node := range nodes { // Check if this container's image is present and get its size. @@ -221,12 +205,7 @@ func calculateScoreFromSize(sumSize int64) int { // close the two metrics are to each other. // Detail: score = 10 - abs(cpuFraction-memoryFraction)*10. The algorithm is partly inspired by: // "Wei Huang et al. An Energy Efficient Virtual Machine Placement Algorithm with Balanced Resource Utilization" -func BalancedResourceAllocation(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodeLister algorithm.NodeLister) (schedulerapi.HostPriorityList, error) { - nodes, err := nodeLister.List() - if err != nil { - return schedulerapi.HostPriorityList{}, err - } - +func BalancedResourceAllocation(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*api.Node) (schedulerapi.HostPriorityList, error) { podResources := getNonZeroRequests(pod) list := make(schedulerapi.HostPriorityList, 0, len(nodes)) for _, node := range nodes { @@ -294,15 +273,10 @@ func NewNodePreferAvoidPodsPriority(controllerLister algorithm.ControllerLister, return nodePreferAvoid.CalculateNodePreferAvoidPodsPriority } -func (npa *NodePreferAvoidPod) CalculateNodePreferAvoidPodsPriority(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodeLister algorithm.NodeLister) (schedulerapi.HostPriorityList, error) { - nodes, err := nodeLister.List() - if err != nil { - return nil, err - } - +func (npa *NodePreferAvoidPod) CalculateNodePreferAvoidPodsPriority(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*api.Node) (schedulerapi.HostPriorityList, error) { // TODO: Once we have ownerReference fully implemented, use it to find controller for the pod. - rcs, err := npa.controllerLister.GetPodControllers(pod) - rss, err := npa.replicaSetLister.GetPodReplicaSets(pod) + rcs, _ := npa.controllerLister.GetPodControllers(pod) + rss, _ := npa.replicaSetLister.GetPodReplicaSets(pod) if len(rcs) == 0 && len(rss) == 0 { result := make(schedulerapi.HostPriorityList, 0, len(nodes)) for _, node := range nodes { diff --git a/plugin/pkg/scheduler/algorithm/priorities/priorities_test.go b/plugin/pkg/scheduler/algorithm/priorities/priorities_test.go index 34e66da56ab..898797f9e6e 100644 --- a/plugin/pkg/scheduler/algorithm/priorities/priorities_test.go +++ b/plugin/pkg/scheduler/algorithm/priorities/priorities_test.go @@ -402,7 +402,7 @@ func TestLeastRequested(t *testing.T) { } nodeNameToInfo[node.Name].SetNode(node) } - list, err := LeastRequestedPriority(test.pod, nodeNameToInfo, algorithm.FakeNodeLister(test.nodes)) + list, err := LeastRequestedPriority(test.pod, nodeNameToInfo, test.nodes) if err != nil { t.Errorf("unexpected error: %v", err) } @@ -496,7 +496,7 @@ func TestNewNodeLabelPriority(t *testing.T) { label: test.label, presence: test.presence, } - list, err := prioritizer.CalculateNodeLabelPriority(nil, map[string]*schedulercache.NodeInfo{}, algorithm.FakeNodeLister(test.nodes)) + list, err := prioritizer.CalculateNodeLabelPriority(nil, map[string]*schedulercache.NodeInfo{}, test.nodes) if err != nil { t.Errorf("unexpected error: %v", err) } @@ -741,7 +741,7 @@ func TestBalancedResourceAllocation(t *testing.T) { } nodeNameToInfo[node.Name].SetNode(node) } - list, err := BalancedResourceAllocation(test.pod, nodeNameToInfo, algorithm.FakeNodeLister(test.nodes)) + list, err := BalancedResourceAllocation(test.pod, nodeNameToInfo, test.nodes) if err != nil { t.Errorf("unexpected error: %v", err) } @@ -885,7 +885,7 @@ func TestImageLocalityPriority(t *testing.T) { for _, test := range tests { nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods) - list, err := ImageLocalityPriority(test.pod, nodeNameToInfo, algorithm.FakeNodeLister(test.nodes)) + list, err := ImageLocalityPriority(test.pod, nodeNameToInfo, test.nodes) if err != nil { t.Errorf("unexpected error: %v", err) } @@ -1071,7 +1071,7 @@ func TestNodePreferAvoidPriority(t *testing.T) { controllerLister: algorithm.FakeControllerLister(test.rcs), replicaSetLister: algorithm.FakeReplicaSetLister(test.rss), } - list, err := prioritizer.CalculateNodePreferAvoidPodsPriority(test.pod, map[string]*schedulercache.NodeInfo{}, algorithm.FakeNodeLister(test.nodes)) + list, err := prioritizer.CalculateNodePreferAvoidPodsPriority(test.pod, map[string]*schedulercache.NodeInfo{}, test.nodes) if err != nil { t.Errorf("unexpected error: %v", err) } diff --git a/plugin/pkg/scheduler/algorithm/priorities/selector_spreading.go b/plugin/pkg/scheduler/algorithm/priorities/selector_spreading.go index 9de1bd392c2..c2331f3607d 100644 --- a/plugin/pkg/scheduler/algorithm/priorities/selector_spreading.go +++ b/plugin/pkg/scheduler/algorithm/priorities/selector_spreading.go @@ -61,12 +61,7 @@ func NewSelectorSpreadPriority(podLister algorithm.PodLister, serviceLister algo // i.e. it pushes the scheduler towards a node where there's the smallest number of // pods which match the same service selectors or RC selectors as the pod being scheduled. // Where zone information is included on the nodes, it favors nodes in zones with fewer existing matching pods. -func (s *SelectorSpread) CalculateSpreadPriority(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodeLister algorithm.NodeLister) (schedulerapi.HostPriorityList, error) { - nodes, err := nodeLister.List() - if err != nil { - return nil, err - } - +func (s *SelectorSpread) CalculateSpreadPriority(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*api.Node) (schedulerapi.HostPriorityList, error) { selectors := make([]labels.Selector, 0, 3) if services, err := s.serviceLister.GetPodServices(pod); err == nil { for _, service := range services { @@ -193,13 +188,8 @@ func NewServiceAntiAffinityPriority(podLister algorithm.PodLister, serviceLister // CalculateAntiAffinityPriority spreads pods by minimizing the number of pods belonging to the same service // on machines with the same value for a particular label. // The label to be considered is provided to the struct (ServiceAntiAffinity). -func (s *ServiceAntiAffinity) CalculateAntiAffinityPriority(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodeLister algorithm.NodeLister) (schedulerapi.HostPriorityList, error) { +func (s *ServiceAntiAffinity) CalculateAntiAffinityPriority(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*api.Node) (schedulerapi.HostPriorityList, error) { var nsServicePods []*api.Pod - nodes, err := nodeLister.List() - if err != nil { - return nil, err - } - if services, err := s.serviceLister.GetPodServices(pod); err == nil { // just use the first service and get the other pods within the service // TODO: a separate predicate can be created that tries to handle all services for the pod diff --git a/plugin/pkg/scheduler/algorithm/priorities/selector_spreading_test.go b/plugin/pkg/scheduler/algorithm/priorities/selector_spreading_test.go index 034500b2d06..5f8c3f2ab61 100644 --- a/plugin/pkg/scheduler/algorithm/priorities/selector_spreading_test.go +++ b/plugin/pkg/scheduler/algorithm/priorities/selector_spreading_test.go @@ -278,7 +278,7 @@ func TestSelectorSpreadPriority(t *testing.T) { for _, test := range tests { nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods) selectorSpread := SelectorSpread{podLister: algorithm.FakePodLister(test.pods), serviceLister: algorithm.FakeServiceLister(test.services), controllerLister: algorithm.FakeControllerLister(test.rcs), replicaSetLister: algorithm.FakeReplicaSetLister(test.rss)} - list, err := selectorSpread.CalculateSpreadPriority(test.pod, nodeNameToInfo, algorithm.FakeNodeLister(makeNodeList(test.nodes))) + list, err := selectorSpread.CalculateSpreadPriority(test.pod, nodeNameToInfo, makeNodeList(test.nodes)) if err != nil { t.Errorf("unexpected error: %v", err) } @@ -479,7 +479,7 @@ func TestZoneSelectorSpreadPriority(t *testing.T) { for _, test := range tests { nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods) selectorSpread := SelectorSpread{podLister: algorithm.FakePodLister(test.pods), serviceLister: algorithm.FakeServiceLister(test.services), controllerLister: algorithm.FakeControllerLister(test.rcs), replicaSetLister: algorithm.FakeReplicaSetLister(test.rss)} - list, err := selectorSpread.CalculateSpreadPriority(test.pod, nodeNameToInfo, algorithm.FakeNodeLister(makeLabeledNodeList(labeledNodes))) + list, err := selectorSpread.CalculateSpreadPriority(test.pod, nodeNameToInfo, makeLabeledNodeList(labeledNodes)) if err != nil { t.Errorf("unexpected error: %v", err) } @@ -651,7 +651,7 @@ func TestZoneSpreadPriority(t *testing.T) { for _, test := range tests { nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods) zoneSpread := ServiceAntiAffinity{podLister: algorithm.FakePodLister(test.pods), serviceLister: algorithm.FakeServiceLister(test.services), label: "zone"} - list, err := zoneSpread.CalculateAntiAffinityPriority(test.pod, nodeNameToInfo, algorithm.FakeNodeLister(makeLabeledNodeList(test.nodes))) + list, err := zoneSpread.CalculateAntiAffinityPriority(test.pod, nodeNameToInfo, makeLabeledNodeList(test.nodes)) if err != nil { t.Errorf("unexpected error: %v", err) } diff --git a/plugin/pkg/scheduler/algorithm/priorities/taint_toleration.go b/plugin/pkg/scheduler/algorithm/priorities/taint_toleration.go index 8e6364c7c63..b299044d212 100644 --- a/plugin/pkg/scheduler/algorithm/priorities/taint_toleration.go +++ b/plugin/pkg/scheduler/algorithm/priorities/taint_toleration.go @@ -19,7 +19,6 @@ package priorities import ( "github.com/golang/glog" "k8s.io/kubernetes/pkg/api" - "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm" schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api" "k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache" ) @@ -52,12 +51,7 @@ func getAllTolerationPreferNoSchedule(tolerations []api.Toleration) (tolerationL } // ComputeTaintTolerationPriority prepares the priority list for all the nodes based on the number of intolerable taints on the node -func ComputeTaintTolerationPriority(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodeLister algorithm.NodeLister) (schedulerapi.HostPriorityList, error) { - nodes, err := nodeLister.List() - if err != nil { - return nil, err - } - +func ComputeTaintTolerationPriority(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*api.Node) (schedulerapi.HostPriorityList, error) { // the max value of counts var maxCount float64 // counts hold the count of intolerable taints of a pod for a given node diff --git a/plugin/pkg/scheduler/algorithm/priorities/taint_toleration_test.go b/plugin/pkg/scheduler/algorithm/priorities/taint_toleration_test.go index b2dd48df72d..e2f77ea72d0 100644 --- a/plugin/pkg/scheduler/algorithm/priorities/taint_toleration_test.go +++ b/plugin/pkg/scheduler/algorithm/priorities/taint_toleration_test.go @@ -22,7 +22,6 @@ import ( "testing" "k8s.io/kubernetes/pkg/api" - "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm" schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api" "k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache" ) @@ -212,10 +211,7 @@ func TestTaintAndToleration(t *testing.T) { } for _, test := range tests { nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap([]*api.Pod{{}}) - list, err := ComputeTaintTolerationPriority( - test.pod, - nodeNameToInfo, - algorithm.FakeNodeLister(test.nodes)) + list, err := ComputeTaintTolerationPriority(test.pod, nodeNameToInfo, test.nodes) if err != nil { t.Errorf("%s, unexpected error: %v", test.test, err) } diff --git a/plugin/pkg/scheduler/algorithm/types.go b/plugin/pkg/scheduler/algorithm/types.go index 14959ccaf68..d098bd4ff3a 100644 --- a/plugin/pkg/scheduler/algorithm/types.go +++ b/plugin/pkg/scheduler/algorithm/types.go @@ -26,7 +26,7 @@ import ( // The failure information is given by the error. type FitPredicate func(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, error) -type PriorityFunction func(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodeLister NodeLister) (schedulerapi.HostPriorityList, error) +type PriorityFunction func(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*api.Node) (schedulerapi.HostPriorityList, error) type PriorityConfig struct { Function PriorityFunction diff --git a/plugin/pkg/scheduler/extender_test.go b/plugin/pkg/scheduler/extender_test.go index aa85f1b9f7e..a8afc605546 100644 --- a/plugin/pkg/scheduler/extender_test.go +++ b/plugin/pkg/scheduler/extender_test.go @@ -89,12 +89,7 @@ func machine2PrioritizerExtender(pod *api.Pod, nodes []*api.Node) (*schedulerapi return &result, nil } -func machine2Prioritizer(_ *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodeLister algorithm.NodeLister) (schedulerapi.HostPriorityList, error) { - nodes, err := nodeLister.List() - if err != nil { - return []schedulerapi.HostPriority{}, err - } - +func machine2Prioritizer(_ *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*api.Node) (schedulerapi.HostPriorityList, error) { result := []schedulerapi.HostPriority{} for _, node := range nodes { score := 1 diff --git a/plugin/pkg/scheduler/factory/factory_test.go b/plugin/pkg/scheduler/factory/factory_test.go index 24d0823d0d2..5d1dd5f529c 100644 --- a/plugin/pkg/scheduler/factory/factory_test.go +++ b/plugin/pkg/scheduler/factory/factory_test.go @@ -32,7 +32,6 @@ import ( "k8s.io/kubernetes/pkg/runtime" "k8s.io/kubernetes/pkg/types" utiltesting "k8s.io/kubernetes/pkg/util/testing" - "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm" schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api" latestschedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api/latest" "k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache" @@ -124,11 +123,11 @@ func PredicateTwo(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeI return true, nil } -func PriorityOne(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodeLister algorithm.NodeLister) (schedulerapi.HostPriorityList, error) { +func PriorityOne(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*api.Node) (schedulerapi.HostPriorityList, error) { return []schedulerapi.HostPriority{}, nil } -func PriorityTwo(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodeLister algorithm.NodeLister) (schedulerapi.HostPriorityList, error) { +func PriorityTwo(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*api.Node) (schedulerapi.HostPriorityList, error) { return []schedulerapi.HostPriority{}, nil } diff --git a/plugin/pkg/scheduler/generic_scheduler.go b/plugin/pkg/scheduler/generic_scheduler.go index 65f92c239e6..0b4e96b8898 100644 --- a/plugin/pkg/scheduler/generic_scheduler.go +++ b/plugin/pkg/scheduler/generic_scheduler.go @@ -93,7 +93,7 @@ func (g *genericScheduler) Schedule(pod *api.Pod, nodeLister algorithm.NodeListe } trace.Step("Computing predicates") - filteredNodes, failedPredicateMap, err := findNodesThatFit(pod, g.cachedNodeInfoMap, g.predicates, nodes, g.extenders) + filteredNodes, failedPredicateMap, err := findNodesThatFit(pod, g.cachedNodeInfoMap, nodes, g.predicates, g.extenders) if err != nil { return "", err } @@ -106,7 +106,7 @@ func (g *genericScheduler) Schedule(pod *api.Pod, nodeLister algorithm.NodeListe } trace.Step("Prioritizing") - priorityList, err := PrioritizeNodes(pod, g.cachedNodeInfoMap, g.prioritizers, algorithm.FakeNodeLister(filteredNodes), g.extenders) + priorityList, err := PrioritizeNodes(pod, g.cachedNodeInfoMap, g.prioritizers, filteredNodes, g.extenders) if err != nil { return "", err } @@ -136,7 +136,12 @@ func (g *genericScheduler) selectHost(priorityList schedulerapi.HostPriorityList // Filters the nodes to find the ones that fit based on the given predicate functions // Each node is passed through the predicate functions to determine if it is a fit -func findNodesThatFit(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, predicateFuncs map[string]algorithm.FitPredicate, nodes []*api.Node, extenders []algorithm.SchedulerExtender) ([]*api.Node, FailedPredicateMap, error) { +func findNodesThatFit( + pod *api.Pod, + nodeNameToInfo map[string]*schedulercache.NodeInfo, + nodes []*api.Node, + predicateFuncs map[string]algorithm.FitPredicate, + extenders []algorithm.SchedulerExtender) ([]*api.Node, FailedPredicateMap, error) { // Create filtered list with enough space to avoid growing it. filtered := make([]*api.Node, 0, len(nodes)) failedPredicateMap := FailedPredicateMap{} @@ -229,7 +234,7 @@ func PrioritizeNodes( pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, priorityConfigs []algorithm.PriorityConfig, - nodeLister algorithm.NodeLister, + nodes []*api.Node, extenders []algorithm.SchedulerExtender, ) (schedulerapi.HostPriorityList, error) { result := make(schedulerapi.HostPriorityList, 0, len(nodeNameToInfo)) @@ -237,7 +242,7 @@ func PrioritizeNodes( // If no priority configs are provided, then the EqualPriority function is applied // This is required to generate the priority list in the required format if len(priorityConfigs) == 0 && len(extenders) == 0 { - return EqualPriority(pod, nodeNameToInfo, nodeLister) + return EqualPriority(pod, nodeNameToInfo, nodes) } var ( @@ -258,7 +263,7 @@ func PrioritizeNodes( defer wg.Done() weight := config.Weight priorityFunc := config.Function - prioritizedList, err := priorityFunc(pod, nodeNameToInfo, nodeLister) + prioritizedList, err := priorityFunc(pod, nodeNameToInfo, nodes) mu.Lock() defer mu.Unlock() @@ -279,11 +284,7 @@ func PrioritizeNodes( // wait for all go routines to finish wg.Wait() - if len(extenders) != 0 && nodeLister != nil { - nodes, err := nodeLister.List() - if err != nil { - return schedulerapi.HostPriorityList{}, err - } + if len(extenders) != 0 && nodes != nil { for _, extender := range extenders { wg.Add(1) go func(ext algorithm.SchedulerExtender) { @@ -313,13 +314,7 @@ func PrioritizeNodes( } // EqualPriority is a prioritizer function that gives an equal weight of one to all nodes -func EqualPriority(_ *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodeLister algorithm.NodeLister) (schedulerapi.HostPriorityList, error) { - nodes, err := nodeLister.List() - if err != nil { - glog.Errorf("Failed to list nodes: %v", err) - return []schedulerapi.HostPriority{}, err - } - +func EqualPriority(_ *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*api.Node) (schedulerapi.HostPriorityList, error) { result := make(schedulerapi.HostPriorityList, len(nodes)) for _, node := range nodes { result = append(result, schedulerapi.HostPriority{ diff --git a/plugin/pkg/scheduler/generic_scheduler_test.go b/plugin/pkg/scheduler/generic_scheduler_test.go index 7bad27ea0f3..72d2d00137e 100644 --- a/plugin/pkg/scheduler/generic_scheduler_test.go +++ b/plugin/pkg/scheduler/generic_scheduler_test.go @@ -59,13 +59,8 @@ func hasNoPodsPredicate(pod *api.Pod, meta interface{}, nodeInfo *schedulercache return false, algorithmpredicates.ErrFakePredicate } -func numericPriority(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodeLister algorithm.NodeLister) (schedulerapi.HostPriorityList, error) { - nodes, err := nodeLister.List() +func numericPriority(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*api.Node) (schedulerapi.HostPriorityList, error) { result := []schedulerapi.HostPriority{} - - if err != nil { - return nil, fmt.Errorf("failed to list nodes: %v", err) - } for _, node := range nodes { score, err := strconv.Atoi(node.Name) if err != nil { @@ -79,11 +74,11 @@ func numericPriority(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.Nod return result, nil } -func reverseNumericPriority(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodeLister algorithm.NodeLister) (schedulerapi.HostPriorityList, error) { +func reverseNumericPriority(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*api.Node) (schedulerapi.HostPriorityList, error) { var maxScore float64 minScore := math.MaxFloat64 reverseResult := []schedulerapi.HostPriority{} - result, err := numericPriority(pod, nodeNameToInfo, nodeLister) + result, err := numericPriority(pod, nodeNameToInfo, nodes) if err != nil { return nil, err } @@ -321,7 +316,7 @@ func TestFindFitAllError(t *testing.T) { "2": schedulercache.NewNodeInfo(), "1": schedulercache.NewNodeInfo(), } - _, predicateMap, err := findNodesThatFit(&api.Pod{}, nodeNameToInfo, predicates, makeNodeList(nodes), nil) + _, predicateMap, err := findNodesThatFit(&api.Pod{}, nodeNameToInfo, makeNodeList(nodes), predicates, nil) if err != nil { t.Errorf("unexpected error: %v", err) @@ -355,7 +350,7 @@ func TestFindFitSomeError(t *testing.T) { nodeNameToInfo[name].SetNode(&api.Node{ObjectMeta: api.ObjectMeta{Name: name}}) } - _, predicateMap, err := findNodesThatFit(pod, nodeNameToInfo, predicates, makeNodeList(nodes), nil) + _, predicateMap, err := findNodesThatFit(pod, nodeNameToInfo, makeNodeList(nodes), predicates, nil) if err != nil && !reflect.DeepEqual(err, algorithmpredicates.ErrFakePredicate) { t.Errorf("unexpected error: %v", err) } From fc6d38baa2a0739f43af1d4284bfd31c430dc44c Mon Sep 17 00:00:00 2001 From: Wojciech Tyczynski Date: Wed, 20 Jul 2016 08:28:57 +0200 Subject: [PATCH 2/2] Avoid locking when computing predicates. --- plugin/pkg/scheduler/generic_scheduler.go | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/plugin/pkg/scheduler/generic_scheduler.go b/plugin/pkg/scheduler/generic_scheduler.go index 0b4e96b8898..48d05c7d937 100644 --- a/plugin/pkg/scheduler/generic_scheduler.go +++ b/plugin/pkg/scheduler/generic_scheduler.go @@ -21,6 +21,7 @@ import ( "fmt" "sort" "sync" + "sync/atomic" "time" "github.com/golang/glog" @@ -142,33 +143,39 @@ func findNodesThatFit( nodes []*api.Node, predicateFuncs map[string]algorithm.FitPredicate, extenders []algorithm.SchedulerExtender) ([]*api.Node, FailedPredicateMap, error) { - // Create filtered list with enough space to avoid growing it. - filtered := make([]*api.Node, 0, len(nodes)) + var filtered []*api.Node failedPredicateMap := FailedPredicateMap{} if len(predicateFuncs) == 0 { filtered = nodes } else { - predicateResultLock := sync.Mutex{} - errs := []error{} + // Create filtered list with enough space to avoid growing it + // and allow assigning. + filtered = make([]*api.Node, len(nodes)) meta := predicates.PredicateMetadata(pod) + errs := []error{} + + var predicateResultLock sync.Mutex + var filteredLen int32 checkNode := func(i int) { nodeName := nodes[i].Name fits, failedPredicate, err := podFitsOnNode(pod, meta, nodeNameToInfo[nodeName], predicateFuncs) - - predicateResultLock.Lock() - defer predicateResultLock.Unlock() if err != nil { + predicateResultLock.Lock() errs = append(errs, err) + predicateResultLock.Unlock() return } if fits { - filtered = append(filtered, nodes[i]) + filtered[atomic.AddInt32(&filteredLen, 1)-1] = nodes[i] } else { + predicateResultLock.Lock() failedPredicateMap[nodeName] = failedPredicate + predicateResultLock.Unlock() } } workqueue.Parallelize(16, len(nodes), checkNode) + filtered = filtered[:filteredLen] if len(errs) > 0 { return []*api.Node{}, FailedPredicateMap{}, errors.NewAggregate(errs) }