Remove unnecessary error catch in scheduling failure (#121981)

* Deleted from the cache in the handling of scheduling failures due to missing Node

Signed-off-by: utam0k <k0ma@utam0k.jp>

* Support only `nodes`

* Remove unnecessary error catch

Signed-off-by: utam0k <k0ma@utam0k.jp>

* Fix a build error

Signed-off-by: utam0k <k0ma@utam0k.jp>

* Fix a build error

Signed-off-by: utam0k <k0ma@utam0k.jp>

---------

Signed-off-by: utam0k <k0ma@utam0k.jp>
This commit is contained in:
Toru Komatsu 2023-12-14 13:09:08 +09:00 committed by GitHub
parent 6fa0fe53a9
commit 01916625da
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 0 additions and 87 deletions

View File

@ -28,7 +28,6 @@ import (
"time"
v1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/sets"
@ -985,20 +984,6 @@ func (sched *Scheduler) handleSchedulingFailure(ctx context.Context, fwk framewo
podInfo.UnschedulablePlugins = fitError.Diagnosis.UnschedulablePlugins
podInfo.PendingPlugins = fitError.Diagnosis.PendingPlugins
logger.V(2).Info("Unable to schedule pod; no fit; waiting", "pod", klog.KObj(pod), "err", errMsg)
} else if apierrors.IsNotFound(err) {
logger.V(2).Info("Unable to schedule pod, possibly due to node not found; waiting", "pod", klog.KObj(pod), "err", errMsg)
if errStatus, ok := err.(apierrors.APIStatus); ok && errStatus.Status().Details.Kind == "node" {
nodeName := errStatus.Status().Details.Name
// when node is not found, We do not remove the node right away. Trying again to get
// the node and if the node is still not found, then remove it from the scheduler cache.
_, err := fwk.ClientSet().CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{})
if err != nil && apierrors.IsNotFound(err) {
node := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: nodeName}}
if err := sched.Cache.RemoveNode(logger, &node); err != nil {
logger.V(4).Info("Node is not found; failed to remove it from the cache", "node", node.Name)
}
}
}
} else {
logger.Error(err, "Error scheduling pod; retrying", "pod", klog.KObj(pod))
}

View File

@ -26,10 +26,8 @@ import (
"github.com/google/go-cmp/cmp"
v1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/sets"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes"
@ -325,76 +323,6 @@ func TestFailureHandler(t *testing.T) {
}
}
func TestFailureHandler_NodeNotFound(t *testing.T) {
nodeFoo := &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "foo"}}
nodeBar := &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "bar"}}
testPod := st.MakePod().Name("test-pod").Namespace(v1.NamespaceDefault).Obj()
tests := []struct {
name string
nodes []v1.Node
nodeNameToDelete string
injectErr error
expectNodeNames sets.Set[string]
}{
{
name: "node is deleted during a scheduling cycle",
nodes: []v1.Node{*nodeFoo, *nodeBar},
nodeNameToDelete: "foo",
injectErr: apierrors.NewNotFound(v1.Resource("node"), nodeFoo.Name),
expectNodeNames: sets.New("bar"),
},
{
name: "node is not deleted but NodeNotFound is received incorrectly",
nodes: []v1.Node{*nodeFoo, *nodeBar},
injectErr: apierrors.NewNotFound(v1.Resource("node"), nodeFoo.Name),
expectNodeNames: sets.New("foo", "bar"),
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
logger, ctx := ktesting.NewTestContext(t)
ctx, cancel := context.WithCancel(ctx)
defer cancel()
client := fake.NewSimpleClientset(&v1.PodList{Items: []v1.Pod{*testPod}}, &v1.NodeList{Items: tt.nodes})
informerFactory := informers.NewSharedInformerFactory(client, 0)
podInformer := informerFactory.Core().V1().Pods()
// Need to add testPod to the store.
podInformer.Informer().GetStore().Add(testPod)
queue := internalqueue.NewPriorityQueue(nil, informerFactory, internalqueue.WithClock(testingclock.NewFakeClock(time.Now())))
schedulerCache := internalcache.New(ctx, 30*time.Second)
for i := range tt.nodes {
node := tt.nodes[i]
// Add node to schedulerCache no matter it's deleted in API server or not.
schedulerCache.AddNode(logger, &node)
if node.Name == tt.nodeNameToDelete {
client.CoreV1().Nodes().Delete(ctx, node.Name, metav1.DeleteOptions{})
}
}
s, fwk, err := initScheduler(ctx, schedulerCache, queue, client, informerFactory)
if err != nil {
t.Fatal(err)
}
testPodInfo := &framework.QueuedPodInfo{PodInfo: mustNewPodInfo(t, testPod)}
s.FailureHandler(ctx, fwk, testPodInfo, framework.NewStatus(framework.Unschedulable).WithError(tt.injectErr), nil, time.Now())
gotNodes := schedulerCache.Dump().Nodes
gotNodeNames := sets.New[string]()
for _, nodeInfo := range gotNodes {
gotNodeNames.Insert(nodeInfo.Node().Name)
}
if diff := cmp.Diff(tt.expectNodeNames, gotNodeNames); diff != "" {
t.Errorf("Unexpected nodes (-want, +got): %s", diff)
}
})
}
}
func TestFailureHandler_PodAlreadyBound(t *testing.T) {
logger, ctx := ktesting.NewTestContext(t)
ctx, cancel := context.WithCancel(ctx)