Merge pull request #4765 from brendandburns/scheduler

Log a better error with useful info on scheduling failures.
This commit is contained in:
Rohit Jnagal 2015-02-26 12:01:04 -08:00
commit e455ee5d2e
4 changed files with 101 additions and 23 deletions

View File

@ -20,13 +20,31 @@ import (
"fmt"
"math/rand"
"sort"
"strings"
"sync"
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
)
type FailedPredicateMap map[string]util.StringSet
type FitError struct {
Pod api.Pod
FailedPredicates FailedPredicateMap
}
// implementation of the error interface
func (f *FitError) Error() string {
output := fmt.Sprintf("failed to find fit for pod: %v", f.Pod)
for node, predicateList := range f.FailedPredicates {
output = output + fmt.Sprintf("Node %s: %s", node, strings.Join(predicateList.List(), ","))
}
return output
}
type genericScheduler struct {
predicates []FitPredicate
predicates map[string]FitPredicate
prioritizers []PriorityConfig
pods PodLister
random *rand.Rand
@ -42,7 +60,7 @@ func (g *genericScheduler) Schedule(pod api.Pod, minionLister MinionLister) (str
return "", fmt.Errorf("no minions available to schedule pods")
}
filteredNodes, err := findNodesThatFit(pod, g.pods, g.predicates, minions)
filteredNodes, failedPredicateMap, err := findNodesThatFit(pod, g.pods, g.predicates, minions)
if err != nil {
return "", err
}
@ -52,7 +70,10 @@ func (g *genericScheduler) Schedule(pod api.Pod, minionLister MinionLister) (str
return "", err
}
if len(priorityList) == 0 {
return "", fmt.Errorf("failed to find a fit for pod: %v", pod)
return "", &FitError{
Pod: pod,
FailedPredicates: failedPredicateMap,
}
}
return g.selectHost(priorityList)
@ -76,21 +97,26 @@ func (g *genericScheduler) selectHost(priorityList HostPriorityList) (string, er
// Filters the minions to find the ones that fit based on the given predicate functions
// Each minion is passed through the predicate functions to determine if it is a fit
func findNodesThatFit(pod api.Pod, podLister PodLister, predicates []FitPredicate, nodes api.NodeList) (api.NodeList, error) {
func findNodesThatFit(pod api.Pod, podLister PodLister, predicates map[string]FitPredicate, nodes api.NodeList) (api.NodeList, FailedPredicateMap, error) {
filtered := []api.Node{}
machineToPods, err := MapPodsToMachines(podLister)
failedPredicateMap := FailedPredicateMap{}
if err != nil {
return api.NodeList{}, err
return api.NodeList{}, FailedPredicateMap{}, err
}
for _, node := range nodes.Items {
fits := true
for _, predicate := range predicates {
for name, predicate := range predicates {
fit, err := predicate(pod, machineToPods[node.Name], node.Name)
if err != nil {
return api.NodeList{}, err
return api.NodeList{}, FailedPredicateMap{}, err
}
if !fit {
fits = false
if _, found := failedPredicateMap[node.Name]; !found {
failedPredicateMap[node.Name] = util.StringSet{}
}
failedPredicateMap[node.Name].Insert(name)
break
}
}
@ -98,7 +124,7 @@ func findNodesThatFit(pod api.Pod, podLister PodLister, predicates []FitPredicat
filtered = append(filtered, node)
}
}
return api.NodeList{Items: filtered}, nil
return api.NodeList{Items: filtered}, failedPredicateMap, nil
}
// Prioritizes the minions by running the individual priority functions sequentially.
@ -161,7 +187,7 @@ func EqualPriority(pod api.Pod, podLister PodLister, minionLister MinionLister)
return result, nil
}
func NewGenericScheduler(predicates []FitPredicate, prioritizers []PriorityConfig, pods PodLister, random *rand.Rand) Scheduler {
func NewGenericScheduler(predicates map[string]FitPredicate, prioritizers []PriorityConfig, pods PodLister, random *rand.Rand) Scheduler {
return &genericScheduler{
predicates: predicates,
prioritizers: prioritizers,

View File

@ -83,7 +83,7 @@ func reverseNumericPriority(pod api.Pod, podLister PodLister, minionLister Minio
return reverseResult, nil
}
func makeMinionList(nodeNames []string) api.NodeList {
func makeNodeList(nodeNames []string) api.NodeList {
result := api.NodeList{
Items: make([]api.Node, len(nodeNames)),
}
@ -162,7 +162,7 @@ func TestSelectHost(t *testing.T) {
func TestGenericScheduler(t *testing.T) {
tests := []struct {
name string
predicates []FitPredicate
predicates map[string]FitPredicate
prioritizers []PriorityConfig
nodes []string
pod api.Pod
@ -170,14 +170,14 @@ func TestGenericScheduler(t *testing.T) {
expectsErr bool
}{
{
predicates: []FitPredicate{falsePredicate},
predicates: map[string]FitPredicate{"false": falsePredicate},
prioritizers: []PriorityConfig{{Function: EqualPriority, Weight: 1}},
nodes: []string{"machine1", "machine2"},
expectsErr: true,
name: "test 1",
},
{
predicates: []FitPredicate{truePredicate},
predicates: map[string]FitPredicate{"true": truePredicate},
prioritizers: []PriorityConfig{{Function: EqualPriority, Weight: 1}},
nodes: []string{"machine1", "machine2"},
// Random choice between both, the rand seeded above with zero, chooses "machine1"
@ -186,7 +186,7 @@ func TestGenericScheduler(t *testing.T) {
},
{
// Fits on a machine where the pod ID matches the machine name
predicates: []FitPredicate{matchesPredicate},
predicates: map[string]FitPredicate{"matches": matchesPredicate},
prioritizers: []PriorityConfig{{Function: EqualPriority, Weight: 1}},
nodes: []string{"machine1", "machine2"},
pod: api.Pod{ObjectMeta: api.ObjectMeta{Name: "machine2"}},
@ -194,14 +194,14 @@ func TestGenericScheduler(t *testing.T) {
name: "test 3",
},
{
predicates: []FitPredicate{truePredicate},
predicates: map[string]FitPredicate{"true": truePredicate},
prioritizers: []PriorityConfig{{Function: numericPriority, Weight: 1}},
nodes: []string{"3", "2", "1"},
expectedHost: "3",
name: "test 4",
},
{
predicates: []FitPredicate{matchesPredicate},
predicates: map[string]FitPredicate{"matches": matchesPredicate},
prioritizers: []PriorityConfig{{Function: numericPriority, Weight: 1}},
nodes: []string{"3", "2", "1"},
pod: api.Pod{ObjectMeta: api.ObjectMeta{Name: "2"}},
@ -209,7 +209,7 @@ func TestGenericScheduler(t *testing.T) {
name: "test 5",
},
{
predicates: []FitPredicate{truePredicate},
predicates: map[string]FitPredicate{"true": truePredicate},
prioritizers: []PriorityConfig{{Function: numericPriority, Weight: 1}, {Function: reverseNumericPriority, Weight: 2}},
nodes: []string{"3", "2", "1"},
pod: api.Pod{ObjectMeta: api.ObjectMeta{Name: "2"}},
@ -217,7 +217,7 @@ func TestGenericScheduler(t *testing.T) {
name: "test 6",
},
{
predicates: []FitPredicate{truePredicate, falsePredicate},
predicates: map[string]FitPredicate{"true": truePredicate, "false": falsePredicate},
prioritizers: []PriorityConfig{{Function: numericPriority, Weight: 1}},
nodes: []string{"3", "2", "1"},
expectsErr: true,
@ -228,7 +228,7 @@ func TestGenericScheduler(t *testing.T) {
for _, test := range tests {
random := rand.New(rand.NewSource(0))
scheduler := NewGenericScheduler(test.predicates, test.prioritizers, FakePodLister([]api.Pod{}), random)
machine, err := scheduler.Schedule(test.pod, FakeMinionLister(makeMinionList(test.nodes)))
machine, err := scheduler.Schedule(test.pod, FakeMinionLister(makeNodeList(test.nodes)))
if test.expectsErr {
if err == nil {
t.Error("Unexpected non-error")
@ -243,3 +243,55 @@ func TestGenericScheduler(t *testing.T) {
}
}
}
func TestFindFitAllError(t *testing.T) {
nodes := []string{"3", "2", "1"}
predicates := map[string]FitPredicate{"true": truePredicate, "false": falsePredicate}
_, predicateMap, err := findNodesThatFit(api.Pod{}, FakePodLister([]api.Pod{}), predicates, makeNodeList(nodes))
if err != nil {
t.Errorf("unexpected error: %v")
}
if len(predicateMap) != len(nodes) {
t.Errorf("unexpected failed predicate map: %v", predicateMap)
}
for _, node := range nodes {
failures, found := predicateMap[node]
if !found {
t.Errorf("failed to find node: %s in %v", node, predicateMap)
}
if len(failures) != 1 || !failures.Has("false") {
t.Errorf("unexpected failures: %v", failures)
}
}
}
func TestFindFitSomeError(t *testing.T) {
nodes := []string{"3", "2", "1"}
predicates := map[string]FitPredicate{"true": truePredicate, "match": matchesPredicate}
pod := api.Pod{ObjectMeta: api.ObjectMeta{Name: "1"}}
_, predicateMap, err := findNodesThatFit(pod, FakePodLister([]api.Pod{}), predicates, makeNodeList(nodes))
if err != nil {
t.Errorf("unexpected error: %v")
}
if len(predicateMap) != (len(nodes) - 1) {
t.Errorf("unexpected failed predicate map: %v", predicateMap)
}
for _, node := range nodes {
if node == pod.Name {
continue
}
failures, found := predicateMap[node]
if !found {
t.Errorf("failed to find node: %s in %v", node, predicateMap)
}
if len(failures) != 1 || !failures.Has("match") {
t.Errorf("unexpected failures: %v", failures)
}
}
}

View File

@ -119,7 +119,7 @@ func TestServiceSpreadPriority(t *testing.T) {
for _, test := range tests {
serviceSpread := ServiceSpread{serviceLister: FakeServiceLister(test.services)}
list, err := serviceSpread.CalculateSpreadPriority(test.pod, FakePodLister(test.pods), FakeMinionLister(makeMinionList(test.nodes)))
list, err := serviceSpread.CalculateSpreadPriority(test.pod, FakePodLister(test.pods), FakeMinionLister(makeNodeList(test.nodes)))
if err != nil {
t.Errorf("unexpected error: %v", err)
}

View File

@ -120,17 +120,17 @@ func GetAlgorithmProvider(name string) (*AlgorithmProviderConfig, error) {
return &provider, nil
}
func getFitPredicateFunctions(names util.StringSet) ([]algorithm.FitPredicate, error) {
func getFitPredicateFunctions(names util.StringSet) (map[string]algorithm.FitPredicate, error) {
schedulerFactoryMutex.Lock()
defer schedulerFactoryMutex.Unlock()
predicates := []algorithm.FitPredicate{}
predicates := map[string]algorithm.FitPredicate{}
for _, name := range names.List() {
function, ok := fitPredicateMap[name]
if !ok {
return nil, fmt.Errorf("Invalid predicate name %q specified - no corresponding function found", name)
}
predicates = append(predicates, function)
predicates[name] = function
}
return predicates, nil
}