mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-29 14:37:00 +00:00
Merge pull request #61445 from resouer/extender-priority
Automatic merge from submit-queue (batch tested with PRs 62063, 62169, 62155, 62139, 61445). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Schedule even if extender is not available when using extender **What this PR does / why we need it**: When using scheduler extender, if the extender is not available scheduling of all pods fail. We should let the scheduling happen but display error message that extender is failing. `IsIgnorable()` is added to extender to indicate: if scheduling of all pods should fail when it's unavailable **Backward compabtiility:** We use `IsIgnorable` instead of `IsCritical` so that when this flag is not set, the default value will be `false`, i.e. not ignorable, which consistent with the current behavior in existing extenders. **Which issue(s) this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close the issue(s) when PR gets merged)*: Fixes: #60616 **Special notes for your reviewer**: kindly cc @ravisantoshgudimetla to see if this meets your expectation TODO: update the examples in kubernetes/examples, but the strategy there is not clear to me for now **Release note**: ```release-note Schedule even if extender is not available when using extender ```
This commit is contained in:
commit
6d8df0c400
@ -64,6 +64,10 @@ type SchedulerExtender interface {
|
|||||||
|
|
||||||
// SupportsPreemption returns if the scheduler extender support preemption or not.
|
// SupportsPreemption returns if the scheduler extender support preemption or not.
|
||||||
SupportsPreemption() bool
|
SupportsPreemption() bool
|
||||||
|
|
||||||
|
// IsIgnorable returns true indicates scheduling should not fail when this extender
|
||||||
|
// is unavailable. This gives scheduler ability to fail fast and tolerate non-critical extenders as well.
|
||||||
|
IsIgnorable() bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// ScheduleAlgorithm is an interface implemented by things that know how to schedule pods
|
// ScheduleAlgorithm is an interface implemented by things that know how to schedule pods
|
||||||
|
@ -192,6 +192,9 @@ type ExtenderConfig struct {
|
|||||||
// will skip checking the resource in predicates.
|
// will skip checking the resource in predicates.
|
||||||
// +optional
|
// +optional
|
||||||
ManagedResources []ExtenderManagedResource
|
ManagedResources []ExtenderManagedResource
|
||||||
|
// Ignorable specifies if the extender is ignorable, i.e. scheduling should not
|
||||||
|
// fail when the extender returns an error or is not reachable.
|
||||||
|
Ignorable bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// ExtenderPreemptionResult represents the result returned by preemption phase of extender.
|
// ExtenderPreemptionResult represents the result returned by preemption phase of extender.
|
||||||
|
@ -174,6 +174,9 @@ type ExtenderConfig struct {
|
|||||||
// will skip checking the resource in predicates.
|
// will skip checking the resource in predicates.
|
||||||
// +optional
|
// +optional
|
||||||
ManagedResources []ExtenderManagedResource `json:"managedResources,omitempty"`
|
ManagedResources []ExtenderManagedResource `json:"managedResources,omitempty"`
|
||||||
|
// Ignorable specifies if the extender is ignorable, i.e. scheduling should not
|
||||||
|
// fail when the extender returns an error or is not reachable.
|
||||||
|
Ignorable bool `json:"ignorable,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// ExtenderArgs represents the arguments needed by the extender to filter/prioritize
|
// ExtenderArgs represents the arguments needed by the extender to filter/prioritize
|
||||||
|
@ -49,6 +49,7 @@ type HTTPExtender struct {
|
|||||||
client *http.Client
|
client *http.Client
|
||||||
nodeCacheCapable bool
|
nodeCacheCapable bool
|
||||||
managedResources sets.String
|
managedResources sets.String
|
||||||
|
ignorable bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func makeTransport(config *schedulerapi.ExtenderConfig) (http.RoundTripper, error) {
|
func makeTransport(config *schedulerapi.ExtenderConfig) (http.RoundTripper, error) {
|
||||||
@ -102,9 +103,16 @@ func NewHTTPExtender(config *schedulerapi.ExtenderConfig) (algorithm.SchedulerEx
|
|||||||
client: client,
|
client: client,
|
||||||
nodeCacheCapable: config.NodeCacheCapable,
|
nodeCacheCapable: config.NodeCacheCapable,
|
||||||
managedResources: managedResources,
|
managedResources: managedResources,
|
||||||
|
ignorable: config.Ignorable,
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// IsIgnorable returns true indicates scheduling should not fail when this extender
|
||||||
|
// is unavailable
|
||||||
|
func (h *HTTPExtender) IsIgnorable() bool {
|
||||||
|
return h.ignorable
|
||||||
|
}
|
||||||
|
|
||||||
// SupportsPreemption returns if a extender support preemption.
|
// SupportsPreemption returns if a extender support preemption.
|
||||||
// A extender should have preempt verb defined and enabled its own node cache.
|
// A extender should have preempt verb defined and enabled its own node cache.
|
||||||
func (h *HTTPExtender) SupportsPreemption() bool {
|
func (h *HTTPExtender) SupportsPreemption() bool {
|
||||||
@ -147,11 +155,12 @@ func (h *HTTPExtender) ProcessPreemption(
|
|||||||
|
|
||||||
// Extender will always return NodeNameToMetaVictims.
|
// Extender will always return NodeNameToMetaVictims.
|
||||||
// So let's convert it to NodeToVictims by using NodeNameToInfo.
|
// So let's convert it to NodeToVictims by using NodeNameToInfo.
|
||||||
nodeToVictims, err := h.convertToNodeToVictims(result.NodeNameToMetaVictims, nodeNameToInfo)
|
newNodeToVictims, err := h.convertToNodeToVictims(result.NodeNameToMetaVictims, nodeNameToInfo)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
return nodeToVictims, nil
|
// Do not override nodeToVictims
|
||||||
|
return newNodeToVictims, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// convertToNodeToVictims converts "nodeNameToMetaVictims" from object identifiers,
|
// convertToNodeToVictims converts "nodeNameToMetaVictims" from object identifiers,
|
||||||
|
@ -113,12 +113,17 @@ type FakeExtender struct {
|
|||||||
nodeCacheCapable bool
|
nodeCacheCapable bool
|
||||||
filteredNodes []*v1.Node
|
filteredNodes []*v1.Node
|
||||||
unInterested bool
|
unInterested bool
|
||||||
|
ignorable bool
|
||||||
|
|
||||||
// Cached node information for fake extender
|
// Cached node information for fake extender
|
||||||
cachedNodeNameToInfo map[string]*schedulercache.NodeInfo
|
cachedNodeNameToInfo map[string]*schedulercache.NodeInfo
|
||||||
cachedPDBs []*policy.PodDisruptionBudget
|
cachedPDBs []*policy.PodDisruptionBudget
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (f *FakeExtender) IsIgnorable() bool {
|
||||||
|
return f.ignorable
|
||||||
|
}
|
||||||
|
|
||||||
func (f *FakeExtender) SupportsPreemption() bool {
|
func (f *FakeExtender) SupportsPreemption() bool {
|
||||||
// Assume preempt verb is always defined.
|
// Assume preempt verb is always defined.
|
||||||
return true
|
return true
|
||||||
@ -141,7 +146,10 @@ func (f *FakeExtender) ProcessPreemption(
|
|||||||
|
|
||||||
for node, victims := range nodeToVictimsCopy {
|
for node, victims := range nodeToVictimsCopy {
|
||||||
// Try to do preemption on extender side.
|
// Try to do preemption on extender side.
|
||||||
extenderVictimPods, extendernPDBViolations, fits := f.selectVictimsOnNodeByExtender(pod, node, nodeNameToInfo)
|
extenderVictimPods, extendernPDBViolations, fits, err := f.selectVictimsOnNodeByExtender(pod, node, nodeNameToInfo)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
// If it's unfit after extender's preemption, this node is unresolvable by preemption overall,
|
// If it's unfit after extender's preemption, this node is unresolvable by preemption overall,
|
||||||
// let's remove it from potential preemption nodes.
|
// let's remove it from potential preemption nodes.
|
||||||
if !fits {
|
if !fits {
|
||||||
@ -164,15 +172,18 @@ func (f *FakeExtender) selectVictimsOnNodeByExtender(
|
|||||||
pod *v1.Pod,
|
pod *v1.Pod,
|
||||||
node *v1.Node,
|
node *v1.Node,
|
||||||
nodeNameToInfo map[string]*schedulercache.NodeInfo,
|
nodeNameToInfo map[string]*schedulercache.NodeInfo,
|
||||||
) ([]*v1.Pod, int, bool) {
|
) ([]*v1.Pod, int, bool, error) {
|
||||||
// TODO(harry): add more test in generic_scheduler_test.go to verify this logic.
|
|
||||||
// If a extender support preemption but have no cached node info, let's run filter to make sure
|
// If a extender support preemption but have no cached node info, let's run filter to make sure
|
||||||
// default scheduler's decision still stand with given pod and node.
|
// default scheduler's decision still stand with given pod and node.
|
||||||
if !f.nodeCacheCapable {
|
if !f.nodeCacheCapable {
|
||||||
if fits, _ := f.runPredicate(pod, node); !fits {
|
fits, err := f.runPredicate(pod, node)
|
||||||
return nil, 0, false
|
if err != nil {
|
||||||
|
return nil, 0, false, err
|
||||||
}
|
}
|
||||||
return []*v1.Pod{}, 0, true
|
if !fits {
|
||||||
|
return nil, 0, false, nil
|
||||||
|
}
|
||||||
|
return []*v1.Pod{}, 0, true, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Otherwise, as a extender support preemption and have cached node info, we will assume cachedNodeNameToInfo is available
|
// Otherwise, as a extender support preemption and have cached node info, we will assume cachedNodeNameToInfo is available
|
||||||
@ -200,8 +211,12 @@ func (f *FakeExtender) selectVictimsOnNodeByExtender(
|
|||||||
|
|
||||||
// If the new pod does not fit after removing all the lower priority pods,
|
// If the new pod does not fit after removing all the lower priority pods,
|
||||||
// we are almost done and this node is not suitable for preemption.
|
// we are almost done and this node is not suitable for preemption.
|
||||||
if fits, _ := f.runPredicate(pod, nodeInfoCopy.Node()); !fits {
|
fits, err := f.runPredicate(pod, nodeInfoCopy.Node())
|
||||||
return nil, 0, false
|
if err != nil {
|
||||||
|
return nil, 0, false, err
|
||||||
|
}
|
||||||
|
if !fits {
|
||||||
|
return nil, 0, false, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
var victims []*v1.Pod
|
var victims []*v1.Pod
|
||||||
@ -225,7 +240,7 @@ func (f *FakeExtender) selectVictimsOnNodeByExtender(
|
|||||||
reprievePod(p.(*v1.Pod))
|
reprievePod(p.(*v1.Pod))
|
||||||
}
|
}
|
||||||
|
|
||||||
return victims, numViolatingVictim, true
|
return victims, numViolatingVictim, true, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// runPredicate run predicates of extender one by one for given pod and node.
|
// runPredicate run predicates of extender one by one for given pod and node.
|
||||||
@ -439,7 +454,7 @@ func TestGenericSchedulerWithExtenders(t *testing.T) {
|
|||||||
// Filter/Prioritize phases if the extender is not interested in
|
// Filter/Prioritize phases if the extender is not interested in
|
||||||
// the pod.
|
// the pod.
|
||||||
//
|
//
|
||||||
// If scheduler sends the pod by mistake, the test will fail
|
// If scheduler sends the pod by mistake, the test would fail
|
||||||
// because of the errors from errorPredicateExtender and/or
|
// because of the errors from errorPredicateExtender and/or
|
||||||
// errorPrioritizerExtender.
|
// errorPrioritizerExtender.
|
||||||
predicates: map[string]algorithm.FitPredicate{"true": truePredicate},
|
predicates: map[string]algorithm.FitPredicate{"true": truePredicate},
|
||||||
@ -456,6 +471,28 @@ func TestGenericSchedulerWithExtenders(t *testing.T) {
|
|||||||
expectedHost: "machine2", // machine2 has higher score
|
expectedHost: "machine2", // machine2 has higher score
|
||||||
name: "test 8",
|
name: "test 8",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
// Scheduling is expected to not fail in
|
||||||
|
// Filter/Prioritize phases if the extender is not available and ignorable.
|
||||||
|
//
|
||||||
|
// If scheduler did not ignore the extender, the test would fail
|
||||||
|
// because of the errors from errorPredicateExtender.
|
||||||
|
predicates: map[string]algorithm.FitPredicate{"true": truePredicate},
|
||||||
|
prioritizers: []algorithm.PriorityConfig{{Map: EqualPriorityMap, Weight: 1}},
|
||||||
|
extenders: []FakeExtender{
|
||||||
|
{
|
||||||
|
predicates: []fitPredicate{errorPredicateExtender},
|
||||||
|
ignorable: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
predicates: []fitPredicate{machine1PredicateExtender},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
nodes: []string{"machine1", "machine2"},
|
||||||
|
expectsErr: false,
|
||||||
|
expectedHost: "machine1",
|
||||||
|
name: "test 9",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
|
@ -269,12 +269,25 @@ func (g *genericScheduler) processPreemptionWithExtenders(
|
|||||||
if len(nodeToVictims) > 0 {
|
if len(nodeToVictims) > 0 {
|
||||||
for _, extender := range g.extenders {
|
for _, extender := range g.extenders {
|
||||||
if extender.SupportsPreemption() {
|
if extender.SupportsPreemption() {
|
||||||
var err error
|
newNodeToVictims, err := extender.ProcessPreemption(
|
||||||
// Replace nodeToVictims with result after preemption from extender.
|
pod,
|
||||||
if nodeToVictims, err = extender.ProcessPreemption(pod, nodeToVictims, g.cachedNodeInfoMap); err != nil {
|
nodeToVictims,
|
||||||
|
g.cachedNodeInfoMap,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
if extender.IsIgnorable() {
|
||||||
|
glog.Warningf("Skipping extender %v as it returned error %v and has ignorable flag set",
|
||||||
|
extender, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
// If node list is empty, no preemption will happen, skip other extenders.
|
|
||||||
|
// Replace nodeToVictims with new result after preemption. So the
|
||||||
|
// rest of extenders can continue use it as parameter.
|
||||||
|
nodeToVictims = newNodeToVictims
|
||||||
|
|
||||||
|
// If node list becomes empty, no preemption can happen regardless of other extenders.
|
||||||
if len(nodeToVictims) == 0 {
|
if len(nodeToVictims) == 0 {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
@ -384,7 +397,13 @@ func findNodesThatFit(
|
|||||||
}
|
}
|
||||||
filteredList, failedMap, err := extender.Filter(pod, filtered, nodeNameToInfo)
|
filteredList, failedMap, err := extender.Filter(pod, filtered, nodeNameToInfo)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return []*v1.Node{}, FailedPredicateMap{}, err
|
if extender.IsIgnorable() {
|
||||||
|
glog.Warningf("Skipping extender %v as it returned error %v and has ignorable flag set",
|
||||||
|
extender, err)
|
||||||
|
continue
|
||||||
|
} else {
|
||||||
|
return []*v1.Node{}, FailedPredicateMap{}, err
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for failedNodeName, failedMsg := range failedMap {
|
for failedNodeName, failedMsg := range failedMap {
|
||||||
|
@ -1274,6 +1274,30 @@ func TestPreempt(t *testing.T) {
|
|||||||
expectedNode: "",
|
expectedNode: "",
|
||||||
expectedPods: []string{},
|
expectedPods: []string{},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "One scheduler extender allows only machine1, the other returns error but ignorable. Only machine1 would be chosen",
|
||||||
|
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod1", UID: types.UID("pod1")}, Spec: v1.PodSpec{
|
||||||
|
Containers: veryLargeContainers,
|
||||||
|
Priority: &highPriority},
|
||||||
|
},
|
||||||
|
pods: []*v1.Pod{
|
||||||
|
{ObjectMeta: metav1.ObjectMeta{Name: "m1.1", UID: types.UID("m1.1")}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &midPriority, NodeName: "machine1"}, Status: v1.PodStatus{Phase: v1.PodRunning}},
|
||||||
|
{ObjectMeta: metav1.ObjectMeta{Name: "m1.2", UID: types.UID("m1.2")}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &lowPriority, NodeName: "machine1"}, Status: v1.PodStatus{Phase: v1.PodRunning}},
|
||||||
|
|
||||||
|
{ObjectMeta: metav1.ObjectMeta{Name: "m2.1", UID: types.UID("m2.1")}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine2"}, Status: v1.PodStatus{Phase: v1.PodRunning}},
|
||||||
|
},
|
||||||
|
extenders: []*FakeExtender{
|
||||||
|
{
|
||||||
|
predicates: []fitPredicate{errorPredicateExtender},
|
||||||
|
ignorable: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
predicates: []fitPredicate{machine1PredicateExtender},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expectedNode: "machine1",
|
||||||
|
expectedPods: []string{"m1.1", "m1.2"},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
|
@ -539,6 +539,11 @@ func newConfigFactory(client *clientset.Clientset, hardPodAffinitySymmetricWeigh
|
|||||||
type fakeExtender struct {
|
type fakeExtender struct {
|
||||||
isBinder bool
|
isBinder bool
|
||||||
interestedPodName string
|
interestedPodName string
|
||||||
|
ignorable bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *fakeExtender) IsIgnorable() bool {
|
||||||
|
return f.ignorable
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f *fakeExtender) ProcessPreemption(
|
func (f *fakeExtender) ProcessPreemption(
|
||||||
|
Loading…
Reference in New Issue
Block a user