mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-12 21:36:24 +00:00
Remove unnecessary error catch in scheduling failure (#121981)
* Deleted from the cache in the handling of scheduling failures due to missing Node Signed-off-by: utam0k <k0ma@utam0k.jp> * Support only `nodes` * Remove unnecessary error catch Signed-off-by: utam0k <k0ma@utam0k.jp> * Fix a build error Signed-off-by: utam0k <k0ma@utam0k.jp> * Fix a build error Signed-off-by: utam0k <k0ma@utam0k.jp> --------- Signed-off-by: utam0k <k0ma@utam0k.jp>
This commit is contained in:
parent
6fa0fe53a9
commit
01916625da
@ -28,7 +28,6 @@ import (
|
||||
"time"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
@ -985,20 +984,6 @@ func (sched *Scheduler) handleSchedulingFailure(ctx context.Context, fwk framewo
|
||||
podInfo.UnschedulablePlugins = fitError.Diagnosis.UnschedulablePlugins
|
||||
podInfo.PendingPlugins = fitError.Diagnosis.PendingPlugins
|
||||
logger.V(2).Info("Unable to schedule pod; no fit; waiting", "pod", klog.KObj(pod), "err", errMsg)
|
||||
} else if apierrors.IsNotFound(err) {
|
||||
logger.V(2).Info("Unable to schedule pod, possibly due to node not found; waiting", "pod", klog.KObj(pod), "err", errMsg)
|
||||
if errStatus, ok := err.(apierrors.APIStatus); ok && errStatus.Status().Details.Kind == "node" {
|
||||
nodeName := errStatus.Status().Details.Name
|
||||
// when node is not found, We do not remove the node right away. Trying again to get
|
||||
// the node and if the node is still not found, then remove it from the scheduler cache.
|
||||
_, err := fwk.ClientSet().CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{})
|
||||
if err != nil && apierrors.IsNotFound(err) {
|
||||
node := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: nodeName}}
|
||||
if err := sched.Cache.RemoveNode(logger, &node); err != nil {
|
||||
logger.V(4).Info("Node is not found; failed to remove it from the cache", "node", node.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
logger.Error(err, "Error scheduling pod; retrying", "pod", klog.KObj(pod))
|
||||
}
|
||||
|
@ -26,10 +26,8 @@ import (
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/client-go/informers"
|
||||
"k8s.io/client-go/kubernetes"
|
||||
@ -325,76 +323,6 @@ func TestFailureHandler(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestFailureHandler_NodeNotFound(t *testing.T) {
|
||||
nodeFoo := &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "foo"}}
|
||||
nodeBar := &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "bar"}}
|
||||
testPod := st.MakePod().Name("test-pod").Namespace(v1.NamespaceDefault).Obj()
|
||||
tests := []struct {
|
||||
name string
|
||||
nodes []v1.Node
|
||||
nodeNameToDelete string
|
||||
injectErr error
|
||||
expectNodeNames sets.Set[string]
|
||||
}{
|
||||
{
|
||||
name: "node is deleted during a scheduling cycle",
|
||||
nodes: []v1.Node{*nodeFoo, *nodeBar},
|
||||
nodeNameToDelete: "foo",
|
||||
injectErr: apierrors.NewNotFound(v1.Resource("node"), nodeFoo.Name),
|
||||
expectNodeNames: sets.New("bar"),
|
||||
},
|
||||
{
|
||||
name: "node is not deleted but NodeNotFound is received incorrectly",
|
||||
nodes: []v1.Node{*nodeFoo, *nodeBar},
|
||||
injectErr: apierrors.NewNotFound(v1.Resource("node"), nodeFoo.Name),
|
||||
expectNodeNames: sets.New("foo", "bar"),
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
logger, ctx := ktesting.NewTestContext(t)
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
client := fake.NewSimpleClientset(&v1.PodList{Items: []v1.Pod{*testPod}}, &v1.NodeList{Items: tt.nodes})
|
||||
informerFactory := informers.NewSharedInformerFactory(client, 0)
|
||||
podInformer := informerFactory.Core().V1().Pods()
|
||||
// Need to add testPod to the store.
|
||||
podInformer.Informer().GetStore().Add(testPod)
|
||||
|
||||
queue := internalqueue.NewPriorityQueue(nil, informerFactory, internalqueue.WithClock(testingclock.NewFakeClock(time.Now())))
|
||||
schedulerCache := internalcache.New(ctx, 30*time.Second)
|
||||
|
||||
for i := range tt.nodes {
|
||||
node := tt.nodes[i]
|
||||
// Add node to schedulerCache no matter it's deleted in API server or not.
|
||||
schedulerCache.AddNode(logger, &node)
|
||||
if node.Name == tt.nodeNameToDelete {
|
||||
client.CoreV1().Nodes().Delete(ctx, node.Name, metav1.DeleteOptions{})
|
||||
}
|
||||
}
|
||||
|
||||
s, fwk, err := initScheduler(ctx, schedulerCache, queue, client, informerFactory)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
testPodInfo := &framework.QueuedPodInfo{PodInfo: mustNewPodInfo(t, testPod)}
|
||||
s.FailureHandler(ctx, fwk, testPodInfo, framework.NewStatus(framework.Unschedulable).WithError(tt.injectErr), nil, time.Now())
|
||||
|
||||
gotNodes := schedulerCache.Dump().Nodes
|
||||
gotNodeNames := sets.New[string]()
|
||||
for _, nodeInfo := range gotNodes {
|
||||
gotNodeNames.Insert(nodeInfo.Node().Name)
|
||||
}
|
||||
if diff := cmp.Diff(tt.expectNodeNames, gotNodeNames); diff != "" {
|
||||
t.Errorf("Unexpected nodes (-want, +got): %s", diff)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestFailureHandler_PodAlreadyBound(t *testing.T) {
|
||||
logger, ctx := ktesting.NewTestContext(t)
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
|
Loading…
Reference in New Issue
Block a user