mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-05 02:09:56 +00:00
bug(nodeunschedulable): register missing Pod event for NodeUnschedulable plugin
This commit is contained in:
parent
048c8536d6
commit
495981974e
@ -51,12 +51,56 @@ const (
|
||||
// EventsToRegister returns the possible events that may make a Pod
|
||||
// failed by this plugin schedulable.
|
||||
func (pl *NodeUnschedulable) EventsToRegister(_ context.Context) ([]framework.ClusterEventWithHint, error) {
|
||||
// preCheck is not used when QHint is enabled.
|
||||
if !pl.enableSchedulingQueueHint {
|
||||
return []framework.ClusterEventWithHint{
|
||||
// A note about UpdateNodeLabel event:
|
||||
// Ideally, it's supposed to register only Add | UpdateNodeTaint because UpdateNodeLabel will never change the result from this plugin.
|
||||
// But, we may miss Node/Add event due to preCheck, and we decided to register UpdateNodeTaint | UpdateNodeLabel for all plugins registering Node/Add.
|
||||
// See: https://github.com/kubernetes/kubernetes/issues/109437
|
||||
{Event: framework.ClusterEvent{Resource: framework.Node, ActionType: framework.Add | framework.UpdateNodeTaint | framework.UpdateNodeLabel}, QueueingHintFn: pl.isSchedulableAfterNodeChange},
|
||||
}, nil
|
||||
}
|
||||
|
||||
return []framework.ClusterEventWithHint{
|
||||
// When QueueingHint is enabled, we don't use preCheck and we don't need to register UpdateNodeLabel event.
|
||||
{Event: framework.ClusterEvent{Resource: framework.Node, ActionType: framework.Add | framework.UpdateNodeTaint}, QueueingHintFn: pl.isSchedulableAfterNodeChange},
|
||||
// When the QueueingHint feature is enabled,
|
||||
// the scheduling queue uses Pod/Update Queueing Hint
|
||||
// to determine whether a Pod's update makes the Pod schedulable or not.
|
||||
// https://github.com/kubernetes/kubernetes/pull/122234
|
||||
{Event: framework.ClusterEvent{Resource: framework.Pod, ActionType: framework.UpdatePodTolerations}, QueueingHintFn: pl.isSchedulableAfterPodTolerationChange},
|
||||
}, nil
|
||||
}
|
||||
|
||||
// isSchedulableAfterPodTolerationChange is invoked whenever a pod's toleration changed.
|
||||
func (pl *NodeUnschedulable) isSchedulableAfterPodTolerationChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
_, modifiedPod, err := util.As[*v1.Pod](oldObj, newObj)
|
||||
if err != nil {
|
||||
return framework.Queue, err
|
||||
}
|
||||
|
||||
if pod.UID == modifiedPod.UID {
|
||||
// Note: we don't need to check oldPod tolerations the taint because:
|
||||
// - Taint can be added, but can't be modified nor removed.
|
||||
// - If the Pod already has the toleration, it shouldn't have rejected by this plugin in the first place.
|
||||
// Meaning, here this Pod has been rejected by this plugin, and hence it shouldn't have the toleration yet.
|
||||
if v1helper.TolerationsTolerateTaint(modifiedPod.Spec.Tolerations, &v1.Taint{
|
||||
Key: v1.TaintNodeUnschedulable,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
}) {
|
||||
// This update makes the pod tolerate the unschedulable taint.
|
||||
logger.V(5).Info("a new toleration is added for the unschedulable Pod, and it may make it schedulable", "pod", klog.KObj(modifiedPod))
|
||||
return framework.Queue, nil
|
||||
}
|
||||
logger.V(5).Info("a new toleration is added for the unschedulable Pod, but it's an unrelated toleration", "pod", klog.KObj(modifiedPod))
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
logger.V(5).Info("a new toleration is added for a Pod, but it's an unrelated Pod and wouldn't change the TaintToleration plugin's decision", "pod", klog.KObj(modifiedPod))
|
||||
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
// isSchedulableAfterNodeChange is invoked for all node events reported by
|
||||
// an informer. It checks whether that change made a previously unschedulable
|
||||
// pod schedulable.
|
||||
|
@ -23,6 +23,7 @@ import (
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature"
|
||||
st "k8s.io/kubernetes/pkg/scheduler/testing"
|
||||
"k8s.io/kubernetes/test/utils/ktesting"
|
||||
)
|
||||
|
||||
@ -96,14 +97,14 @@ func TestIsSchedulableAfterNodeChange(t *testing.T) {
|
||||
expectedErr bool
|
||||
}{
|
||||
{
|
||||
name: "backoff-wrong-new-object",
|
||||
name: "error-wrong-new-object",
|
||||
pod: &v1.Pod{},
|
||||
newObj: "not-a-node",
|
||||
expectedHint: framework.Queue,
|
||||
expectedErr: true,
|
||||
},
|
||||
{
|
||||
name: "backoff-wrong-old-object",
|
||||
name: "error-wrong-old-object",
|
||||
pod: &v1.Pod{},
|
||||
newObj: &v1.Node{
|
||||
Spec: v1.NodeSpec{
|
||||
@ -186,3 +187,64 @@ func TestIsSchedulableAfterNodeChange(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsSchedulableAfterPodTolerationChange(t *testing.T) {
|
||||
testCases := []struct {
|
||||
name string
|
||||
pod *v1.Pod
|
||||
oldObj, newObj interface{}
|
||||
expectedHint framework.QueueingHint
|
||||
expectedErr bool
|
||||
}{
|
||||
{
|
||||
name: "error-wrong-new-object",
|
||||
pod: &v1.Pod{},
|
||||
newObj: "not-a-pod",
|
||||
expectedHint: framework.Queue,
|
||||
expectedErr: true,
|
||||
},
|
||||
{
|
||||
name: "error-wrong-old-object",
|
||||
pod: &v1.Pod{},
|
||||
newObj: &v1.Pod{},
|
||||
oldObj: "not-a-pod",
|
||||
expectedHint: framework.Queue,
|
||||
expectedErr: true,
|
||||
},
|
||||
{
|
||||
name: "skip-different-pod-update",
|
||||
pod: st.MakePod().UID("uid").Obj(),
|
||||
newObj: st.MakePod().UID("different-uid").Toleration(v1.TaintNodeUnschedulable).Obj(),
|
||||
oldObj: st.MakePod().UID("different-uid").Obj(),
|
||||
expectedHint: framework.QueueSkip,
|
||||
},
|
||||
{
|
||||
name: "queue-when-the-unsched-pod-gets-toleration",
|
||||
pod: st.MakePod().UID("uid").Obj(),
|
||||
newObj: st.MakePod().UID("uid").Toleration(v1.TaintNodeUnschedulable).Obj(),
|
||||
oldObj: st.MakePod().UID("uid").Obj(),
|
||||
expectedHint: framework.Queue,
|
||||
},
|
||||
{
|
||||
name: "skip-when-the-unsched-pod-gets-unrelated-toleration",
|
||||
pod: st.MakePod().UID("uid").Obj(),
|
||||
newObj: st.MakePod().UID("uid").Toleration("unrelated-key").Obj(),
|
||||
oldObj: st.MakePod().UID("uid").Obj(),
|
||||
expectedHint: framework.QueueSkip,
|
||||
},
|
||||
}
|
||||
|
||||
for _, testCase := range testCases {
|
||||
t.Run(testCase.name, func(t *testing.T) {
|
||||
logger, _ := ktesting.NewTestContext(t)
|
||||
pl := &NodeUnschedulable{}
|
||||
got, err := pl.isSchedulableAfterPodTolerationChange(logger, testCase.pod, testCase.oldObj, testCase.newObj)
|
||||
if err != nil && !testCase.expectedErr {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
}
|
||||
if got != testCase.expectedHint {
|
||||
t.Errorf("isSchedulableAfterNodeChange() = %v, want %v", got, testCase.expectedHint)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -69,7 +69,8 @@ const (
|
||||
UpdatePodLabel
|
||||
// UpdatePodScaleDown is an update for pod's scale down (i.e., any resource request is reduced).
|
||||
UpdatePodScaleDown
|
||||
// UpdatePodTolerations is an update for pod's tolerations.
|
||||
// UpdatePodTolerations is an addition for pod's tolerations.
|
||||
// (Due to API validation, we can add, but cannot modify or remove tolerations.)
|
||||
UpdatePodTolerations
|
||||
// UpdatePodSchedulingGatesEliminated is an update for pod's scheduling gates, which eliminates all scheduling gates in the Pod.
|
||||
UpdatePodSchedulingGatesEliminated
|
||||
|
Loading…
Reference in New Issue
Block a user