Merge pull request #47003 from aveshagarwal/master-scheduler-aggregate-errors-issue

Automatic merge from submit-queue (batch tested with PRs 45877, 46846, 46630, 46087, 47003)

Remove duplicate errors from an aggregate error input.

This PR, in general, removes duplicate errors from an aggregate error input, and returns unique errors with their occurrence count. Specifically,  this PR helps with some scheduler errors that fill the log enormously. For example, see the following `truncated` output from a 300-plus nodes cluster, as there was a same error from almost all nodes.


[SchedulerPredicates failed due to persistentvolumeclaims "mongodb" not found, which is unexpected., SchedulerPredicates failed due to persistentvolumeclaims "mongodb" not found, which is unexpected., SchedulerPredicates failed due to persistentvolumeclaims "mongodb" not found, which is unexpected., SchedulerPredicates failed due to persistentvolumeclaims "mongodb" not found, which is unexpected., SchedulerPredicates failed due to persistentvolumeclaims "mongodb" not found, which is unexpected., SchedulerPredicates failed due to persistentvolumeclaims "mongodb" not found, which is unexpected., SchedulerPredicates failed due to persistentvolumeclaims "mongodb" not found, which is unexpected., SchedulerPredicates failed due to persistentvolumeclaims "mongodb" not found, which is unexpected., SchedulerPredicates failed due to persistentvolumeclaims "mongodb" not found, which is unexpected., SchedulerPredicates failed due to persistentvolumeclaims "mongodb" not found, which is unexpected., SchedulerPredicates failed due to persistentvolumeclaims "mongodb" not found, which is unexpected., SchedulerPredicates failed due to persistentvolumeclaims "mongodb" not found, which is unexpected., SchedulerPredicates failed due to persistentvolumeclaims "mongodb" not found, which is unexpected., SchedulerPredicates failed due to persistentvolumeclaims "mongodb" not found, which is unexpected., SchedulerPredicates failed due to persistentvolumeclaims "mongodb" not found, which is unexpected., SchedulerPredicates failed due to persistentvolumeclaims "mongodb" not found, which is unexpected., SchedulerPredicates failed due to persistentvolumeclaims "mongodb" not found, which is unexpected., SchedulerPredicates failed due to persistentvolumeclaims "mongodb" not found.........

After this PR, the output looks like (on a 2-node cluster):

SchedulerPredicates failed due to persistentvolumeclaims "mongodb" not found, which is unexpected.(Count=2)

@derekwaynecarr @smarterclayton @kubernetes/sig-scheduling-pr-reviews 

Fixes https://github.com/kubernetes/kubernetes/issues/47145
This commit is contained in:
Kubernetes Submit Queue 2017-06-07 17:55:52 -07:00 committed by GitHub
commit 914f5d659e
3 changed files with 59 additions and 3 deletions

View File

@ -178,7 +178,7 @@ func findNodesThatFit(
// Create filtered list with enough space to avoid growing it
// and allow assigning.
filtered = make([]*v1.Node, len(nodes))
errs := []error{}
errs := errors.MessageCountMap{}
var predicateResultLock sync.Mutex
var filteredLen int32
@ -189,7 +189,7 @@ func findNodesThatFit(
fits, failedPredicates, err := podFitsOnNode(pod, meta, nodeNameToInfo[nodeName], predicateFuncs, ecache)
if err != nil {
predicateResultLock.Lock()
errs = append(errs, err)
errs[err.Error()]++
predicateResultLock.Unlock()
return
}
@ -204,7 +204,7 @@ func findNodesThatFit(
workqueue.Parallelize(16, len(nodes), checkNode)
filtered = filtered[:filteredLen]
if len(errs) > 0 {
return []*v1.Node{}, FailedPredicateMap{}, errors.NewAggregate(errs)
return []*v1.Node{}, FailedPredicateMap{}, errors.CreateAggregateFromMessageCountMap(errs)
}
}

View File

@ -21,6 +21,9 @@ import (
"fmt"
)
// MessagesgCountMap contains occurance for each error message.
type MessageCountMap map[string]int
// Aggregate represents an object that contains multiple errors, but does not
// necessarily have singular semantic meaning.
type Aggregate interface {
@ -147,6 +150,22 @@ func Flatten(agg Aggregate) Aggregate {
return NewAggregate(result)
}
// CreateAggregateFromMessageCountMap converts MessageCountMap Aggregate
func CreateAggregateFromMessageCountMap(m MessageCountMap) Aggregate {
if m == nil {
return nil
}
result := make([]error, 0, len(m))
for errStr, count := range m {
var countStr string
if count > 1 {
countStr = fmt.Sprintf(" (repeated %v times)", count)
}
result = append(result, fmt.Errorf("%v%v", errStr, countStr))
}
return NewAggregate(result)
}
// Reduce will return err or, if err is an Aggregate and only has one item,
// the first item in the aggregate.
func Reduce(err error) error {

View File

@ -19,6 +19,7 @@ package errors
import (
"fmt"
"reflect"
"sort"
"testing"
)
@ -262,6 +263,42 @@ func TestFlatten(t *testing.T) {
}
}
func TestCreateAggregateFromMessageCountMap(t *testing.T) {
testCases := []struct {
name string
mcp MessageCountMap
expected Aggregate
}{
{
"input has single instance of one message",
MessageCountMap{"abc": 1},
aggregate{fmt.Errorf("abc")},
},
{
"input has multiple messages",
MessageCountMap{"abc": 2, "ghi": 1},
aggregate{fmt.Errorf("abc (repeated 2 times)"), fmt.Errorf("ghi")},
},
}
var expected, agg []error
for _, testCase := range testCases {
t.Run(testCase.name, func(t *testing.T) {
if testCase.expected != nil {
expected = testCase.expected.Errors()
sort.Slice(expected, func(i, j int) bool { return expected[i].Error() < expected[j].Error() })
}
if testCase.mcp != nil {
agg = CreateAggregateFromMessageCountMap(testCase.mcp).Errors()
sort.Slice(agg, func(i, j int) bool { return agg[i].Error() < agg[j].Error() })
}
if !reflect.DeepEqual(expected, agg) {
t.Errorf("expected %v, got %v", expected, agg)
}
})
}
}
func TestAggregateGoroutines(t *testing.T) {
testCases := []struct {
errs []error