mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 03:41:45 +00:00
Merge pull request #89463 from fxmumu/feature-scheduler-err-handler
Remove nested if statement and test it
This commit is contained in:
commit
57144b38eb
@ -61,6 +61,7 @@ go_test(
|
||||
embed = [":go_default_library"],
|
||||
deps = [
|
||||
"//pkg/api/testing:go_default_library",
|
||||
"//pkg/apis/core:go_default_library",
|
||||
"//pkg/controller/volume/scheduling:go_default_library",
|
||||
"//pkg/features:go_default_library",
|
||||
"//pkg/scheduler/apis/config:go_default_library",
|
||||
@ -85,6 +86,7 @@ go_test(
|
||||
"//pkg/scheduler/testing:go_default_library",
|
||||
"//staging/src/k8s.io/api/core/v1:go_default_library",
|
||||
"//staging/src/k8s.io/api/events/v1beta1:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/api/errors:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
|
||||
|
@ -461,26 +461,24 @@ func MakeDefaultErrorFunc(client clientset.Interface, podQueue internalqueue.Sch
|
||||
pod := podInfo.Pod
|
||||
if err == core.ErrNoNodesAvailable {
|
||||
klog.V(2).Infof("Unable to schedule %v/%v: no nodes are registered to the cluster; waiting", pod.Namespace, pod.Name)
|
||||
} else {
|
||||
if _, ok := err.(*core.FitError); ok {
|
||||
klog.V(2).Infof("Unable to schedule %v/%v: no fit: %v; waiting", pod.Namespace, pod.Name, err)
|
||||
} else if apierrors.IsNotFound(err) {
|
||||
klog.V(2).Infof("Unable to schedule %v/%v: possibly due to node not found: %v; waiting", pod.Namespace, pod.Name, err)
|
||||
if errStatus, ok := err.(apierrors.APIStatus); ok && errStatus.Status().Details.Kind == "node" {
|
||||
nodeName := errStatus.Status().Details.Name
|
||||
// when node is not found, We do not remove the node right away. Trying again to get
|
||||
// the node and if the node is still not found, then remove it from the scheduler cache.
|
||||
_, err := client.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{})
|
||||
if err != nil && apierrors.IsNotFound(err) {
|
||||
node := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: nodeName}}
|
||||
if err := schedulerCache.RemoveNode(&node); err != nil {
|
||||
klog.V(4).Infof("Node %q is not found; failed to remove it from the cache.", node.Name)
|
||||
}
|
||||
} else if _, ok := err.(*core.FitError); ok {
|
||||
klog.V(2).Infof("Unable to schedule %v/%v: no fit: %v; waiting", pod.Namespace, pod.Name, err)
|
||||
} else if apierrors.IsNotFound(err) {
|
||||
klog.V(2).Infof("Unable to schedule %v/%v: possibly due to node not found: %v; waiting", pod.Namespace, pod.Name, err)
|
||||
if errStatus, ok := err.(apierrors.APIStatus); ok && errStatus.Status().Details.Kind == "node" {
|
||||
nodeName := errStatus.Status().Details.Name
|
||||
// when node is not found, We do not remove the node right away. Trying again to get
|
||||
// the node and if the node is still not found, then remove it from the scheduler cache.
|
||||
_, err := client.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{})
|
||||
if err != nil && apierrors.IsNotFound(err) {
|
||||
node := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: nodeName}}
|
||||
if err := schedulerCache.RemoveNode(&node); err != nil {
|
||||
klog.V(4).Infof("Node %q is not found; failed to remove it from the cache.", node.Name)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
klog.Errorf("Error scheduling %v/%v: %v; retrying", pod.Namespace, pod.Name, err)
|
||||
}
|
||||
} else {
|
||||
klog.Errorf("Error scheduling %v/%v: %v; retrying", pod.Namespace, pod.Name, err)
|
||||
}
|
||||
|
||||
podSchedulingCycle := podQueue.SchedulingCycle()
|
||||
|
@ -20,6 +20,8 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
apicore "k8s.io/kubernetes/pkg/apis/core"
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
@ -318,8 +320,13 @@ func TestDefaultErrorFunc(t *testing.T) {
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "foo", Namespace: "bar"},
|
||||
Spec: apitesting.V1DeepEqualSafePodSpec(),
|
||||
}
|
||||
|
||||
nodeBar, nodeFoo :=
|
||||
&v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "bar"}},
|
||||
&v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "foo"}}
|
||||
|
||||
testPodInfo := &framework.PodInfo{Pod: testPod}
|
||||
client := fake.NewSimpleClientset(&v1.PodList{Items: []v1.Pod{*testPod}})
|
||||
client := fake.NewSimpleClientset(&v1.PodList{Items: []v1.Pod{*testPod}}, &v1.NodeList{Items: []v1.Node{*nodeBar}})
|
||||
stopCh := make(chan struct{})
|
||||
defer close(stopCh)
|
||||
|
||||
@ -328,8 +335,18 @@ func TestDefaultErrorFunc(t *testing.T) {
|
||||
schedulerCache := internalcache.New(30*time.Second, stopCh)
|
||||
errFunc := MakeDefaultErrorFunc(client, queue, schedulerCache)
|
||||
|
||||
// Trigger error handling again to put the pod in unschedulable queue
|
||||
errFunc(testPodInfo, nil)
|
||||
_ = schedulerCache.AddNode(nodeFoo)
|
||||
|
||||
// assume nodeFoo was not found
|
||||
err := apierrors.NewNotFound(apicore.Resource("node"), nodeFoo.Name)
|
||||
errFunc(testPodInfo, err)
|
||||
dump := schedulerCache.Dump()
|
||||
for _, n := range dump.Nodes {
|
||||
if e, a := nodeFoo, n.Node(); reflect.DeepEqual(e, a) {
|
||||
t.Errorf("Node %s is still in schedulerCache", e.Name)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Try up to a minute to retrieve the error pod from priority queue
|
||||
foundPodFlag := false
|
||||
@ -355,6 +372,35 @@ func TestDefaultErrorFunc(t *testing.T) {
|
||||
t.Errorf("Failed to get pod from the unschedulable queue after waiting for a minute: %v", testPod)
|
||||
}
|
||||
|
||||
_ = queue.Delete(testPod)
|
||||
|
||||
// Trigger error handling again to put the pod in unschedulable queue
|
||||
errFunc(testPodInfo, nil)
|
||||
|
||||
// Try up to a minute to retrieve the error pod from priority queue
|
||||
foundPodFlag = false
|
||||
maxIterations = 10 * 60
|
||||
for i := 0; i < maxIterations; i++ {
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
got := getPodfromPriorityQueue(queue, testPod)
|
||||
if got == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
testClientGetPodRequest(client, t, testPod.Namespace, testPod.Name)
|
||||
|
||||
if e, a := testPod, got; !reflect.DeepEqual(e, a) {
|
||||
t.Errorf("Expected %v, got %v", e, a)
|
||||
}
|
||||
|
||||
foundPodFlag = true
|
||||
break
|
||||
}
|
||||
|
||||
if !foundPodFlag {
|
||||
t.Errorf("Failed to get pod from the unschedulable queue after waiting for a minute: %v", testPod)
|
||||
}
|
||||
|
||||
// Remove the pod from priority queue to test putting error
|
||||
// pod in backoff queue.
|
||||
queue.Delete(testPod)
|
||||
@ -423,7 +469,7 @@ func testClientGetPodRequest(client *fake.Clientset, t *testing.T, podNs string,
|
||||
requestReceived := false
|
||||
actions := client.Actions()
|
||||
for _, a := range actions {
|
||||
if a.GetVerb() == "get" {
|
||||
if a.GetVerb() == "get" && a.GetResource().Resource == "pods" {
|
||||
getAction, ok := a.(clienttesting.GetAction)
|
||||
if !ok {
|
||||
t.Errorf("Can't cast action object to GetAction interface")
|
||||
|
Loading…
Reference in New Issue
Block a user