mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-05 18:24:07 +00:00
bug(nodeunschedulable): register missing Pod event for NodeUnschedulable plugin
This commit is contained in:
parent
048c8536d6
commit
495981974e
@ -51,12 +51,56 @@ const (
|
|||||||
// EventsToRegister returns the possible events that may make a Pod
|
// EventsToRegister returns the possible events that may make a Pod
|
||||||
// failed by this plugin schedulable.
|
// failed by this plugin schedulable.
|
||||||
func (pl *NodeUnschedulable) EventsToRegister(_ context.Context) ([]framework.ClusterEventWithHint, error) {
|
func (pl *NodeUnschedulable) EventsToRegister(_ context.Context) ([]framework.ClusterEventWithHint, error) {
|
||||||
// preCheck is not used when QHint is enabled.
|
if !pl.enableSchedulingQueueHint {
|
||||||
|
return []framework.ClusterEventWithHint{
|
||||||
|
// A note about UpdateNodeLabel event:
|
||||||
|
// Ideally, it's supposed to register only Add | UpdateNodeTaint because UpdateNodeLabel will never change the result from this plugin.
|
||||||
|
// But, we may miss Node/Add event due to preCheck, and we decided to register UpdateNodeTaint | UpdateNodeLabel for all plugins registering Node/Add.
|
||||||
|
// See: https://github.com/kubernetes/kubernetes/issues/109437
|
||||||
|
{Event: framework.ClusterEvent{Resource: framework.Node, ActionType: framework.Add | framework.UpdateNodeTaint | framework.UpdateNodeLabel}, QueueingHintFn: pl.isSchedulableAfterNodeChange},
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
return []framework.ClusterEventWithHint{
|
return []framework.ClusterEventWithHint{
|
||||||
|
// When QueueingHint is enabled, we don't use preCheck and we don't need to register UpdateNodeLabel event.
|
||||||
{Event: framework.ClusterEvent{Resource: framework.Node, ActionType: framework.Add | framework.UpdateNodeTaint}, QueueingHintFn: pl.isSchedulableAfterNodeChange},
|
{Event: framework.ClusterEvent{Resource: framework.Node, ActionType: framework.Add | framework.UpdateNodeTaint}, QueueingHintFn: pl.isSchedulableAfterNodeChange},
|
||||||
|
// When the QueueingHint feature is enabled,
|
||||||
|
// the scheduling queue uses Pod/Update Queueing Hint
|
||||||
|
// to determine whether a Pod's update makes the Pod schedulable or not.
|
||||||
|
// https://github.com/kubernetes/kubernetes/pull/122234
|
||||||
|
{Event: framework.ClusterEvent{Resource: framework.Pod, ActionType: framework.UpdatePodTolerations}, QueueingHintFn: pl.isSchedulableAfterPodTolerationChange},
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// isSchedulableAfterPodTolerationChange is invoked whenever a pod's toleration changed.
|
||||||
|
func (pl *NodeUnschedulable) isSchedulableAfterPodTolerationChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||||
|
_, modifiedPod, err := util.As[*v1.Pod](oldObj, newObj)
|
||||||
|
if err != nil {
|
||||||
|
return framework.Queue, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if pod.UID == modifiedPod.UID {
|
||||||
|
// Note: we don't need to check oldPod tolerations the taint because:
|
||||||
|
// - Taint can be added, but can't be modified nor removed.
|
||||||
|
// - If the Pod already has the toleration, it shouldn't have rejected by this plugin in the first place.
|
||||||
|
// Meaning, here this Pod has been rejected by this plugin, and hence it shouldn't have the toleration yet.
|
||||||
|
if v1helper.TolerationsTolerateTaint(modifiedPod.Spec.Tolerations, &v1.Taint{
|
||||||
|
Key: v1.TaintNodeUnschedulable,
|
||||||
|
Effect: v1.TaintEffectNoSchedule,
|
||||||
|
}) {
|
||||||
|
// This update makes the pod tolerate the unschedulable taint.
|
||||||
|
logger.V(5).Info("a new toleration is added for the unschedulable Pod, and it may make it schedulable", "pod", klog.KObj(modifiedPod))
|
||||||
|
return framework.Queue, nil
|
||||||
|
}
|
||||||
|
logger.V(5).Info("a new toleration is added for the unschedulable Pod, but it's an unrelated toleration", "pod", klog.KObj(modifiedPod))
|
||||||
|
return framework.QueueSkip, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.V(5).Info("a new toleration is added for a Pod, but it's an unrelated Pod and wouldn't change the TaintToleration plugin's decision", "pod", klog.KObj(modifiedPod))
|
||||||
|
|
||||||
|
return framework.QueueSkip, nil
|
||||||
|
}
|
||||||
|
|
||||||
// isSchedulableAfterNodeChange is invoked for all node events reported by
|
// isSchedulableAfterNodeChange is invoked for all node events reported by
|
||||||
// an informer. It checks whether that change made a previously unschedulable
|
// an informer. It checks whether that change made a previously unschedulable
|
||||||
// pod schedulable.
|
// pod schedulable.
|
||||||
|
@ -23,6 +23,7 @@ import (
|
|||||||
v1 "k8s.io/api/core/v1"
|
v1 "k8s.io/api/core/v1"
|
||||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature"
|
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature"
|
||||||
|
st "k8s.io/kubernetes/pkg/scheduler/testing"
|
||||||
"k8s.io/kubernetes/test/utils/ktesting"
|
"k8s.io/kubernetes/test/utils/ktesting"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -96,14 +97,14 @@ func TestIsSchedulableAfterNodeChange(t *testing.T) {
|
|||||||
expectedErr bool
|
expectedErr bool
|
||||||
}{
|
}{
|
||||||
{
|
{
|
||||||
name: "backoff-wrong-new-object",
|
name: "error-wrong-new-object",
|
||||||
pod: &v1.Pod{},
|
pod: &v1.Pod{},
|
||||||
newObj: "not-a-node",
|
newObj: "not-a-node",
|
||||||
expectedHint: framework.Queue,
|
expectedHint: framework.Queue,
|
||||||
expectedErr: true,
|
expectedErr: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "backoff-wrong-old-object",
|
name: "error-wrong-old-object",
|
||||||
pod: &v1.Pod{},
|
pod: &v1.Pod{},
|
||||||
newObj: &v1.Node{
|
newObj: &v1.Node{
|
||||||
Spec: v1.NodeSpec{
|
Spec: v1.NodeSpec{
|
||||||
@ -186,3 +187,64 @@ func TestIsSchedulableAfterNodeChange(t *testing.T) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestIsSchedulableAfterPodTolerationChange(t *testing.T) {
|
||||||
|
testCases := []struct {
|
||||||
|
name string
|
||||||
|
pod *v1.Pod
|
||||||
|
oldObj, newObj interface{}
|
||||||
|
expectedHint framework.QueueingHint
|
||||||
|
expectedErr bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "error-wrong-new-object",
|
||||||
|
pod: &v1.Pod{},
|
||||||
|
newObj: "not-a-pod",
|
||||||
|
expectedHint: framework.Queue,
|
||||||
|
expectedErr: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "error-wrong-old-object",
|
||||||
|
pod: &v1.Pod{},
|
||||||
|
newObj: &v1.Pod{},
|
||||||
|
oldObj: "not-a-pod",
|
||||||
|
expectedHint: framework.Queue,
|
||||||
|
expectedErr: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "skip-different-pod-update",
|
||||||
|
pod: st.MakePod().UID("uid").Obj(),
|
||||||
|
newObj: st.MakePod().UID("different-uid").Toleration(v1.TaintNodeUnschedulable).Obj(),
|
||||||
|
oldObj: st.MakePod().UID("different-uid").Obj(),
|
||||||
|
expectedHint: framework.QueueSkip,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "queue-when-the-unsched-pod-gets-toleration",
|
||||||
|
pod: st.MakePod().UID("uid").Obj(),
|
||||||
|
newObj: st.MakePod().UID("uid").Toleration(v1.TaintNodeUnschedulable).Obj(),
|
||||||
|
oldObj: st.MakePod().UID("uid").Obj(),
|
||||||
|
expectedHint: framework.Queue,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "skip-when-the-unsched-pod-gets-unrelated-toleration",
|
||||||
|
pod: st.MakePod().UID("uid").Obj(),
|
||||||
|
newObj: st.MakePod().UID("uid").Toleration("unrelated-key").Obj(),
|
||||||
|
oldObj: st.MakePod().UID("uid").Obj(),
|
||||||
|
expectedHint: framework.QueueSkip,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, testCase := range testCases {
|
||||||
|
t.Run(testCase.name, func(t *testing.T) {
|
||||||
|
logger, _ := ktesting.NewTestContext(t)
|
||||||
|
pl := &NodeUnschedulable{}
|
||||||
|
got, err := pl.isSchedulableAfterPodTolerationChange(logger, testCase.pod, testCase.oldObj, testCase.newObj)
|
||||||
|
if err != nil && !testCase.expectedErr {
|
||||||
|
t.Errorf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
if got != testCase.expectedHint {
|
||||||
|
t.Errorf("isSchedulableAfterNodeChange() = %v, want %v", got, testCase.expectedHint)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -69,7 +69,8 @@ const (
|
|||||||
UpdatePodLabel
|
UpdatePodLabel
|
||||||
// UpdatePodScaleDown is an update for pod's scale down (i.e., any resource request is reduced).
|
// UpdatePodScaleDown is an update for pod's scale down (i.e., any resource request is reduced).
|
||||||
UpdatePodScaleDown
|
UpdatePodScaleDown
|
||||||
// UpdatePodTolerations is an update for pod's tolerations.
|
// UpdatePodTolerations is an addition for pod's tolerations.
|
||||||
|
// (Due to API validation, we can add, but cannot modify or remove tolerations.)
|
||||||
UpdatePodTolerations
|
UpdatePodTolerations
|
||||||
// UpdatePodSchedulingGatesEliminated is an update for pod's scheduling gates, which eliminates all scheduling gates in the Pod.
|
// UpdatePodSchedulingGatesEliminated is an update for pod's scheduling gates, which eliminates all scheduling gates in the Pod.
|
||||||
UpdatePodSchedulingGatesEliminated
|
UpdatePodSchedulingGatesEliminated
|
||||||
|
Loading…
Reference in New Issue
Block a user