Merge pull request #89463 from fxmumu/feature-scheduler-err-handler

Remove nested if statement and test it
This commit is contained in:
Kubernetes Prow Robot 2020-03-26 11:14:17 -07:00 committed by GitHub
commit 57144b38eb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 67 additions and 21 deletions

View File

@ -61,6 +61,7 @@ go_test(
embed = [":go_default_library"], embed = [":go_default_library"],
deps = [ deps = [
"//pkg/api/testing:go_default_library", "//pkg/api/testing:go_default_library",
"//pkg/apis/core:go_default_library",
"//pkg/controller/volume/scheduling:go_default_library", "//pkg/controller/volume/scheduling:go_default_library",
"//pkg/features:go_default_library", "//pkg/features:go_default_library",
"//pkg/scheduler/apis/config:go_default_library", "//pkg/scheduler/apis/config:go_default_library",
@ -85,6 +86,7 @@ go_test(
"//pkg/scheduler/testing:go_default_library", "//pkg/scheduler/testing:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library", "//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/api/events/v1beta1:go_default_library", "//staging/src/k8s.io/api/events/v1beta1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/errors:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",

View File

@ -461,26 +461,24 @@ func MakeDefaultErrorFunc(client clientset.Interface, podQueue internalqueue.Sch
pod := podInfo.Pod pod := podInfo.Pod
if err == core.ErrNoNodesAvailable { if err == core.ErrNoNodesAvailable {
klog.V(2).Infof("Unable to schedule %v/%v: no nodes are registered to the cluster; waiting", pod.Namespace, pod.Name) klog.V(2).Infof("Unable to schedule %v/%v: no nodes are registered to the cluster; waiting", pod.Namespace, pod.Name)
} else { } else if _, ok := err.(*core.FitError); ok {
if _, ok := err.(*core.FitError); ok { klog.V(2).Infof("Unable to schedule %v/%v: no fit: %v; waiting", pod.Namespace, pod.Name, err)
klog.V(2).Infof("Unable to schedule %v/%v: no fit: %v; waiting", pod.Namespace, pod.Name, err) } else if apierrors.IsNotFound(err) {
} else if apierrors.IsNotFound(err) { klog.V(2).Infof("Unable to schedule %v/%v: possibly due to node not found: %v; waiting", pod.Namespace, pod.Name, err)
klog.V(2).Infof("Unable to schedule %v/%v: possibly due to node not found: %v; waiting", pod.Namespace, pod.Name, err) if errStatus, ok := err.(apierrors.APIStatus); ok && errStatus.Status().Details.Kind == "node" {
if errStatus, ok := err.(apierrors.APIStatus); ok && errStatus.Status().Details.Kind == "node" { nodeName := errStatus.Status().Details.Name
nodeName := errStatus.Status().Details.Name // when node is not found, We do not remove the node right away. Trying again to get
// when node is not found, We do not remove the node right away. Trying again to get // the node and if the node is still not found, then remove it from the scheduler cache.
// the node and if the node is still not found, then remove it from the scheduler cache. _, err := client.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{})
_, err := client.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{}) if err != nil && apierrors.IsNotFound(err) {
if err != nil && apierrors.IsNotFound(err) { node := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: nodeName}}
node := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: nodeName}} if err := schedulerCache.RemoveNode(&node); err != nil {
if err := schedulerCache.RemoveNode(&node); err != nil { klog.V(4).Infof("Node %q is not found; failed to remove it from the cache.", node.Name)
klog.V(4).Infof("Node %q is not found; failed to remove it from the cache.", node.Name)
}
} }
} }
} else {
klog.Errorf("Error scheduling %v/%v: %v; retrying", pod.Namespace, pod.Name, err)
} }
} else {
klog.Errorf("Error scheduling %v/%v: %v; retrying", pod.Namespace, pod.Name, err)
} }
podSchedulingCycle := podQueue.SchedulingCycle() podSchedulingCycle := podQueue.SchedulingCycle()

View File

@ -20,6 +20,8 @@ import (
"context" "context"
"encoding/json" "encoding/json"
"errors" "errors"
apierrors "k8s.io/apimachinery/pkg/api/errors"
apicore "k8s.io/kubernetes/pkg/apis/core"
"reflect" "reflect"
"strings" "strings"
"testing" "testing"
@ -318,8 +320,13 @@ func TestDefaultErrorFunc(t *testing.T) {
ObjectMeta: metav1.ObjectMeta{Name: "foo", Namespace: "bar"}, ObjectMeta: metav1.ObjectMeta{Name: "foo", Namespace: "bar"},
Spec: apitesting.V1DeepEqualSafePodSpec(), Spec: apitesting.V1DeepEqualSafePodSpec(),
} }
nodeBar, nodeFoo :=
&v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "bar"}},
&v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "foo"}}
testPodInfo := &framework.PodInfo{Pod: testPod} testPodInfo := &framework.PodInfo{Pod: testPod}
client := fake.NewSimpleClientset(&v1.PodList{Items: []v1.Pod{*testPod}}) client := fake.NewSimpleClientset(&v1.PodList{Items: []v1.Pod{*testPod}}, &v1.NodeList{Items: []v1.Node{*nodeBar}})
stopCh := make(chan struct{}) stopCh := make(chan struct{})
defer close(stopCh) defer close(stopCh)
@ -328,8 +335,18 @@ func TestDefaultErrorFunc(t *testing.T) {
schedulerCache := internalcache.New(30*time.Second, stopCh) schedulerCache := internalcache.New(30*time.Second, stopCh)
errFunc := MakeDefaultErrorFunc(client, queue, schedulerCache) errFunc := MakeDefaultErrorFunc(client, queue, schedulerCache)
// Trigger error handling again to put the pod in unschedulable queue _ = schedulerCache.AddNode(nodeFoo)
errFunc(testPodInfo, nil)
// assume nodeFoo was not found
err := apierrors.NewNotFound(apicore.Resource("node"), nodeFoo.Name)
errFunc(testPodInfo, err)
dump := schedulerCache.Dump()
for _, n := range dump.Nodes {
if e, a := nodeFoo, n.Node(); reflect.DeepEqual(e, a) {
t.Errorf("Node %s is still in schedulerCache", e.Name)
break
}
}
// Try up to a minute to retrieve the error pod from priority queue // Try up to a minute to retrieve the error pod from priority queue
foundPodFlag := false foundPodFlag := false
@ -355,6 +372,35 @@ func TestDefaultErrorFunc(t *testing.T) {
t.Errorf("Failed to get pod from the unschedulable queue after waiting for a minute: %v", testPod) t.Errorf("Failed to get pod from the unschedulable queue after waiting for a minute: %v", testPod)
} }
_ = queue.Delete(testPod)
// Trigger error handling again to put the pod in unschedulable queue
errFunc(testPodInfo, nil)
// Try up to a minute to retrieve the error pod from priority queue
foundPodFlag = false
maxIterations = 10 * 60
for i := 0; i < maxIterations; i++ {
time.Sleep(100 * time.Millisecond)
got := getPodfromPriorityQueue(queue, testPod)
if got == nil {
continue
}
testClientGetPodRequest(client, t, testPod.Namespace, testPod.Name)
if e, a := testPod, got; !reflect.DeepEqual(e, a) {
t.Errorf("Expected %v, got %v", e, a)
}
foundPodFlag = true
break
}
if !foundPodFlag {
t.Errorf("Failed to get pod from the unschedulable queue after waiting for a minute: %v", testPod)
}
// Remove the pod from priority queue to test putting error // Remove the pod from priority queue to test putting error
// pod in backoff queue. // pod in backoff queue.
queue.Delete(testPod) queue.Delete(testPod)
@ -423,7 +469,7 @@ func testClientGetPodRequest(client *fake.Clientset, t *testing.T, podNs string,
requestReceived := false requestReceived := false
actions := client.Actions() actions := client.Actions()
for _, a := range actions { for _, a := range actions {
if a.GetVerb() == "get" { if a.GetVerb() == "get" && a.GetResource().Resource == "pods" {
getAction, ok := a.(clienttesting.GetAction) getAction, ok := a.(clienttesting.GetAction)
if !ok { if !ok {
t.Errorf("Can't cast action object to GetAction interface") t.Errorf("Can't cast action object to GetAction interface")