Merge pull request #89463 from fxmumu/feature-scheduler-err-handler

Remove nested if statement and test it
2025-07-23 03:41:45 +00:00 · 2020-03-26 11:14:17 -07:00 · 2020-03-26 11:14:17 -07:00 · 57144b38eb
commit 57144b38eb
parent 4488fd4749 8beb2439a0
3 changed files with 67 additions and 21 deletions
--- a/pkg/scheduler/BUILD
+++ b/pkg/scheduler/BUILD
@ -61,6 +61,7 @@ go_test(
    embed = [":go_default_library"],
    deps = [
        "//pkg/api/testing:go_default_library",
+        "//pkg/apis/core:go_default_library",
        "//pkg/controller/volume/scheduling:go_default_library",
        "//pkg/features:go_default_library",
        "//pkg/scheduler/apis/config:go_default_library",
@ -85,6 +86,7 @@ go_test(
        "//pkg/scheduler/testing:go_default_library",
        "//staging/src/k8s.io/api/core/v1:go_default_library",
        "//staging/src/k8s.io/api/events/v1beta1:go_default_library",
+        "//staging/src/k8s.io/apimachinery/pkg/api/errors:go_default_library",
        "//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
        "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
        "//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
--- a/pkg/scheduler/factory.go
+++ b/pkg/scheduler/factory.go
@ -461,26 +461,24 @@ func MakeDefaultErrorFunc(client clientset.Interface, podQueue internalqueue.Sch
 		pod := podInfo.Pod
 		if err == core.ErrNoNodesAvailable {
 			klog.V(2).Infof("Unable to schedule %v/%v: no nodes are registered to the cluster; waiting", pod.Namespace, pod.Name)
-		} else {
-			if _, ok := err.(*core.FitError); ok {
-				klog.V(2).Infof("Unable to schedule %v/%v: no fit: %v; waiting", pod.Namespace, pod.Name, err)
-			} else if apierrors.IsNotFound(err) {
-				klog.V(2).Infof("Unable to schedule %v/%v: possibly due to node not found: %v; waiting", pod.Namespace, pod.Name, err)
-				if errStatus, ok := err.(apierrors.APIStatus); ok && errStatus.Status().Details.Kind == "node" {
-					nodeName := errStatus.Status().Details.Name
-					// when node is not found, We do not remove the node right away. Trying again to get
-					// the node and if the node is still not found, then remove it from the scheduler cache.
-					_, err := client.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{})
-					if err != nil && apierrors.IsNotFound(err) {
-						node := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: nodeName}}
-						if err := schedulerCache.RemoveNode(&node); err != nil {
-							klog.V(4).Infof("Node %q is not found; failed to remove it from the cache.", node.Name)
-						}
+		} else if _, ok := err.(*core.FitError); ok {
+			klog.V(2).Infof("Unable to schedule %v/%v: no fit: %v; waiting", pod.Namespace, pod.Name, err)
+		} else if apierrors.IsNotFound(err) {
+			klog.V(2).Infof("Unable to schedule %v/%v: possibly due to node not found: %v; waiting", pod.Namespace, pod.Name, err)
+			if errStatus, ok := err.(apierrors.APIStatus); ok && errStatus.Status().Details.Kind == "node" {
+				nodeName := errStatus.Status().Details.Name
+				// when node is not found, We do not remove the node right away. Trying again to get
+				// the node and if the node is still not found, then remove it from the scheduler cache.
+				_, err := client.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{})
+				if err != nil && apierrors.IsNotFound(err) {
+					node := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: nodeName}}
+					if err := schedulerCache.RemoveNode(&node); err != nil {
+						klog.V(4).Infof("Node %q is not found; failed to remove it from the cache.", node.Name)
 					}
 				}
-			} else {
-				klog.Errorf("Error scheduling %v/%v: %v; retrying", pod.Namespace, pod.Name, err)
 			}
+		} else {
+			klog.Errorf("Error scheduling %v/%v: %v; retrying", pod.Namespace, pod.Name, err)
 		}

 		podSchedulingCycle := podQueue.SchedulingCycle()
--- a/pkg/scheduler/factory_test.go
+++ b/pkg/scheduler/factory_test.go
@ -20,6 +20,8 @@ import (
 	"context"
 	"encoding/json"
 	"errors"
+	apierrors "k8s.io/apimachinery/pkg/api/errors"
+	apicore "k8s.io/kubernetes/pkg/apis/core"
 	"reflect"
 	"strings"
 	"testing"
@ -318,8 +320,13 @@ func TestDefaultErrorFunc(t *testing.T) {
 		ObjectMeta: metav1.ObjectMeta{Name: "foo", Namespace: "bar"},
 		Spec:       apitesting.V1DeepEqualSafePodSpec(),
 	}
+
+	nodeBar, nodeFoo :=
+		&v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "bar"}},
+		&v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "foo"}}
+
 	testPodInfo := &framework.PodInfo{Pod: testPod}
-	client := fake.NewSimpleClientset(&v1.PodList{Items: []v1.Pod{*testPod}})
+	client := fake.NewSimpleClientset(&v1.PodList{Items: []v1.Pod{*testPod}}, &v1.NodeList{Items: []v1.Node{*nodeBar}})
 	stopCh := make(chan struct{})
 	defer close(stopCh)

@ -328,8 +335,18 @@ func TestDefaultErrorFunc(t *testing.T) {
 	schedulerCache := internalcache.New(30*time.Second, stopCh)
 	errFunc := MakeDefaultErrorFunc(client, queue, schedulerCache)

-	// Trigger error handling again to put the pod in unschedulable queue
-	errFunc(testPodInfo, nil)
+	_ = schedulerCache.AddNode(nodeFoo)
+
+	// assume nodeFoo was not found
+	err := apierrors.NewNotFound(apicore.Resource("node"), nodeFoo.Name)
+	errFunc(testPodInfo, err)
+	dump := schedulerCache.Dump()
+	for _, n := range dump.Nodes {
+		if e, a := nodeFoo, n.Node(); reflect.DeepEqual(e, a) {
+			t.Errorf("Node %s is still in schedulerCache", e.Name)
+			break
+		}
+	}

 	// Try up to a minute to retrieve the error pod from priority queue
 	foundPodFlag := false
@ -355,6 +372,35 @@ func TestDefaultErrorFunc(t *testing.T) {
 		t.Errorf("Failed to get pod from the unschedulable queue after waiting for a minute: %v", testPod)
 	}

+	_ = queue.Delete(testPod)
+
+	// Trigger error handling again to put the pod in unschedulable queue
+	errFunc(testPodInfo, nil)
+
+	// Try up to a minute to retrieve the error pod from priority queue
+	foundPodFlag = false
+	maxIterations = 10 * 60
+	for i := 0; i < maxIterations; i++ {
+		time.Sleep(100 * time.Millisecond)
+		got := getPodfromPriorityQueue(queue, testPod)
+		if got == nil {
+			continue
+		}
+
+		testClientGetPodRequest(client, t, testPod.Namespace, testPod.Name)
+
+		if e, a := testPod, got; !reflect.DeepEqual(e, a) {
+			t.Errorf("Expected %v, got %v", e, a)
+		}
+
+		foundPodFlag = true
+		break
+	}
+
+	if !foundPodFlag {
+		t.Errorf("Failed to get pod from the unschedulable queue after waiting for a minute: %v", testPod)
+	}
+
 	// Remove the pod from priority queue to test putting error
 	// pod in backoff queue.
 	queue.Delete(testPod)
@ -423,7 +469,7 @@ func testClientGetPodRequest(client *fake.Clientset, t *testing.T, podNs string,
 	requestReceived := false
 	actions := client.Actions()
 	for _, a := range actions {
-		if a.GetVerb() == "get" {
+		if a.GetVerb() == "get" && a.GetResource().Resource == "pods" {
 			getAction, ok := a.(clienttesting.GetAction)
 			if !ok {
 				t.Errorf("Can't cast action object to GetAction interface")