Merge pull request #39491 from jayunit100/sched_Histogram_error

Automatic merge from submit-queue (batch tested with PRs 34488, 39511, 39619, 38342, 39491)

Update FitError as a message component into the PodConditionUpdater.

Fixes #20064 , after a roundabout volley of ideas, we ended up digging into existing Conditions for this, rather then a first class API object.  This is just a quick sketch of the skeleton minimal implementation, it should pretty much "just work".  I'll test it more later today.


Release Note:
```
Histogram data of predicate failures is contained in pod conditions and thus available to users by kubectl commands.
```
This commit is contained in:
Kubernetes Submit Queue 2017-01-10 16:07:17 -08:00 committed by GitHub
commit add3a08a6d
3 changed files with 26 additions and 10 deletions

View File

@ -17,7 +17,6 @@ limitations under the License.
package scheduler
import (
"bytes"
"fmt"
"sort"
"strings"
@ -45,10 +44,10 @@ type FitError struct {
var ErrNoNodesAvailable = fmt.Errorf("no nodes available to schedule pods")
const NoNodeAvailableMsg = "No nodes are available that match all of the following predicates:"
// Error returns detailed information of why the pod failed to fit on each node
func (f *FitError) Error() string {
var buf bytes.Buffer
buf.WriteString(fmt.Sprintf("pod (%s) failed to fit in any node\n", f.Pod.Name))
reasons := make(map[string]int)
for _, predicates := range f.FailedPredicates {
for _, pred := range predicates {
@ -64,10 +63,8 @@ func (f *FitError) Error() string {
sort.Strings(reasonStrings)
return reasonStrings
}
reasonMsg := fmt.Sprintf("fit failure summary on nodes : %v", strings.Join(sortReasonsHistogram(), ", "))
buf.WriteString(reasonMsg)
return buf.String()
reasonMsg := fmt.Sprintf(NoNodeAvailableMsg+": %v.", strings.Join(sortReasonsHistogram(), ", "))
return reasonMsg
}
type genericScheduler struct {

View File

@ -21,6 +21,7 @@ import (
"math"
"reflect"
"strconv"
"strings"
"testing"
"time"
@ -397,6 +398,23 @@ func makeNode(node string, milliCPU, memory int64) *v1.Node {
}
}
func TestHumanReadableFitError(t *testing.T) {
error := &FitError{
Pod: &v1.Pod{ObjectMeta: v1.ObjectMeta{Name: "2"}},
FailedPredicates: FailedPredicateMap{
"1": []algorithm.PredicateFailureReason{algorithmpredicates.ErrNodeUnderMemoryPressure},
"2": []algorithm.PredicateFailureReason{algorithmpredicates.ErrNodeUnderDiskPressure},
"3": []algorithm.PredicateFailureReason{algorithmpredicates.ErrNodeUnderDiskPressure},
},
}
if strings.Contains(error.Error(), "No nodes are available that match all of the following predicates") {
if strings.Contains(error.Error(), "NodeUnderDiskPressure (2)") && strings.Contains(error.Error(), "NodeUnderMemoryPressure (1)") {
return
}
}
t.Errorf("Error message doesn't have all the information content: [" + error.Error() + "]")
}
// The point of this test is to show that you:
// - get the same priority for a zero-request pod as for a pod with the defaults requests,
// both when the zero-request pod is already on the machine and when the zero-request pod

View File

@ -98,9 +98,10 @@ func (s *Scheduler) scheduleOne() {
s.config.Error(pod, err)
s.config.Recorder.Eventf(pod, v1.EventTypeWarning, "FailedScheduling", "%v", err)
s.config.PodConditionUpdater.Update(pod, &v1.PodCondition{
Type: v1.PodScheduled,
Status: v1.ConditionFalse,
Reason: v1.PodReasonUnschedulable,
Type: v1.PodScheduled,
Status: v1.ConditionFalse,
Reason: v1.PodReasonUnschedulable,
Message: err.Error(),
})
return
}