mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-18 16:21:13 +00:00
Merge pull request #50028 from julia-stripe/fix-incorrect-scheduler-bind-call
Automatic merge from submit-queue
Fix incorrect call to 'bind' in scheduler
I previously submitted https://github.com/kubernetes/kubernetes/pull/49661 -- I'm not sure if that PR is too big or what, but this is an attempt at a smaller PR that makes progress on the same issue and is easier to review.
**What this PR does / why we need it**:
In this refactor (https://github.com/kubernetes/kubernetes/commit/ecb962e6585#diff-67f2b61521299ca8d8687b0933bbfb19R223) the scheduler code was refactored into separate `bind` and `assume` functions. When that happened, `bind` was called with `pod` as an argument. The argument to `bind` should be the assumed pod, not the original pod. Evidence that `assumedPod` is the correct argument bind and not `pod`: 80f26fa8a8/plugin/pkg/scheduler/scheduler.go (L229-L234)
. (and it says `assumed` in the function signature for `bind`, even though it's not called with the assumed pod as an argument).
This is an issue (and causes #49314, where pods that fail to bind to a node get stuck indefinitely) in the following scenario:
1. The pod fails to bind to the node
2. `bind` calls `ForgetPod` with the `pod` argument
3. since `ForgetPod` is expecting the assumed pod as an argument (because that's what's in the scheduler cache), it fails with an error like `scheduler cache ForgetPod failed: pod test-677550-rc-edit-namespace/nginx-jvn09 state was assumed on a different node`
4. The pod gets lost forever because of some incomplete error handling (which I haven't addressed here in the interest of making a simpler PR)
In this PR I've fixed the call to `bind` and modified the tests to make sure that `ForgetPod` gets called with the correct argument (the assumed pod) when binding fails.
**Which issue this PR fixes**: fixes #49314
**Special notes for your reviewer**:
**Release note**:
```release-note
```
This commit is contained in:
commit
898b1b3330
@ -185,14 +185,14 @@ func (sched *Scheduler) schedule(pod *v1.Pod) (string, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// assume signals to the cache that a pod is already in the cache, so that binding can be asnychronous.
|
// assume signals to the cache that a pod is already in the cache, so that binding can be asnychronous.
|
||||||
func (sched *Scheduler) assume(pod *v1.Pod, host string) error {
|
// assume modifies `assumed`.
|
||||||
|
func (sched *Scheduler) assume(assumed *v1.Pod, host string) error {
|
||||||
// Optimistically assume that the binding will succeed and send it to apiserver
|
// Optimistically assume that the binding will succeed and send it to apiserver
|
||||||
// in the background.
|
// in the background.
|
||||||
// If the binding fails, scheduler will release resources allocated to assumed pod
|
// If the binding fails, scheduler will release resources allocated to assumed pod
|
||||||
// immediately.
|
// immediately.
|
||||||
assumed := *pod
|
|
||||||
assumed.Spec.NodeName = host
|
assumed.Spec.NodeName = host
|
||||||
if err := sched.config.SchedulerCache.AssumePod(&assumed); err != nil {
|
if err := sched.config.SchedulerCache.AssumePod(assumed); err != nil {
|
||||||
glog.Errorf("scheduler cache AssumePod failed: %v", err)
|
glog.Errorf("scheduler cache AssumePod failed: %v", err)
|
||||||
// TODO: This means that a given pod is already in cache (which means it
|
// TODO: This means that a given pod is already in cache (which means it
|
||||||
// is either assumed or already added). This is most probably result of a
|
// is either assumed or already added). This is most probably result of a
|
||||||
@ -207,7 +207,7 @@ func (sched *Scheduler) assume(pod *v1.Pod, host string) error {
|
|||||||
// predicates in equivalence cache.
|
// predicates in equivalence cache.
|
||||||
// If the binding fails, these invalidated item will not break anything.
|
// If the binding fails, these invalidated item will not break anything.
|
||||||
if sched.config.Ecache != nil {
|
if sched.config.Ecache != nil {
|
||||||
sched.config.Ecache.InvalidateCachedPredicateItemForPodAdd(pod, host)
|
sched.config.Ecache.InvalidateCachedPredicateItemForPodAdd(assumed, host)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@ -264,15 +264,17 @@ func (sched *Scheduler) scheduleOne() {
|
|||||||
|
|
||||||
// Tell the cache to assume that a pod now is running on a given node, even though it hasn't been bound yet.
|
// Tell the cache to assume that a pod now is running on a given node, even though it hasn't been bound yet.
|
||||||
// This allows us to keep scheduling without waiting on binding to occur.
|
// This allows us to keep scheduling without waiting on binding to occur.
|
||||||
err = sched.assume(pod, suggestedHost)
|
assumedPod := *pod
|
||||||
|
// assume modifies `assumedPod` by setting NodeName=suggestedHost
|
||||||
|
err = sched.assume(&assumedPod, suggestedHost)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// bind the pod to its host asynchronously (we can do this b/c of the assumption step above).
|
// bind the pod to its host asynchronously (we can do this b/c of the assumption step above).
|
||||||
go func() {
|
go func() {
|
||||||
err := sched.bind(pod, &v1.Binding{
|
err := sched.bind(&assumedPod, &v1.Binding{
|
||||||
ObjectMeta: metav1.ObjectMeta{Namespace: pod.Namespace, Name: pod.Name, UID: pod.UID},
|
ObjectMeta: metav1.ObjectMeta{Namespace: assumedPod.Namespace, Name: assumedPod.Name, UID: assumedPod.UID},
|
||||||
Target: v1.ObjectReference{
|
Target: v1.ObjectReference{
|
||||||
Kind: "Node",
|
Kind: "Node",
|
||||||
Name: suggestedHost,
|
Name: suggestedHost,
|
||||||
|
@ -115,6 +115,7 @@ func TestScheduler(t *testing.T) {
|
|||||||
sendPod *v1.Pod
|
sendPod *v1.Pod
|
||||||
algo algorithm.ScheduleAlgorithm
|
algo algorithm.ScheduleAlgorithm
|
||||||
expectErrorPod *v1.Pod
|
expectErrorPod *v1.Pod
|
||||||
|
expectForgetPod *v1.Pod
|
||||||
expectAssumedPod *v1.Pod
|
expectAssumedPod *v1.Pod
|
||||||
expectError error
|
expectError error
|
||||||
expectBind *v1.Binding
|
expectBind *v1.Binding
|
||||||
@ -139,7 +140,8 @@ func TestScheduler(t *testing.T) {
|
|||||||
expectAssumedPod: podWithID("foo", testNode.Name),
|
expectAssumedPod: podWithID("foo", testNode.Name),
|
||||||
injectBindError: errB,
|
injectBindError: errB,
|
||||||
expectError: errB,
|
expectError: errB,
|
||||||
expectErrorPod: podWithID("foo", ""),
|
expectErrorPod: podWithID("foo", testNode.Name),
|
||||||
|
expectForgetPod: podWithID("foo", testNode.Name),
|
||||||
eventReason: "FailedScheduling",
|
eventReason: "FailedScheduling",
|
||||||
}, {
|
}, {
|
||||||
sendPod: deletingPod("foo"),
|
sendPod: deletingPod("foo"),
|
||||||
@ -151,11 +153,15 @@ func TestScheduler(t *testing.T) {
|
|||||||
for i, item := range table {
|
for i, item := range table {
|
||||||
var gotError error
|
var gotError error
|
||||||
var gotPod *v1.Pod
|
var gotPod *v1.Pod
|
||||||
|
var gotForgetPod *v1.Pod
|
||||||
var gotAssumedPod *v1.Pod
|
var gotAssumedPod *v1.Pod
|
||||||
var gotBinding *v1.Binding
|
var gotBinding *v1.Binding
|
||||||
configurator := &FakeConfigurator{
|
configurator := &FakeConfigurator{
|
||||||
Config: &Config{
|
Config: &Config{
|
||||||
SchedulerCache: &schedulertesting.FakeCache{
|
SchedulerCache: &schedulertesting.FakeCache{
|
||||||
|
ForgetFunc: func(pod *v1.Pod) {
|
||||||
|
gotForgetPod = pod
|
||||||
|
},
|
||||||
AssumeFunc: func(pod *v1.Pod) {
|
AssumeFunc: func(pod *v1.Pod) {
|
||||||
gotAssumedPod = pod
|
gotAssumedPod = pod
|
||||||
},
|
},
|
||||||
@ -196,6 +202,9 @@ func TestScheduler(t *testing.T) {
|
|||||||
if e, a := item.expectErrorPod, gotPod; !reflect.DeepEqual(e, a) {
|
if e, a := item.expectErrorPod, gotPod; !reflect.DeepEqual(e, a) {
|
||||||
t.Errorf("%v: error pod: wanted %v, got %v", i, e, a)
|
t.Errorf("%v: error pod: wanted %v, got %v", i, e, a)
|
||||||
}
|
}
|
||||||
|
if e, a := item.expectForgetPod, gotForgetPod; !reflect.DeepEqual(e, a) {
|
||||||
|
t.Errorf("%v: forget pod: wanted %v, got %v", i, e, a)
|
||||||
|
}
|
||||||
if e, a := item.expectError, gotError; !reflect.DeepEqual(e, a) {
|
if e, a := item.expectError, gotError; !reflect.DeepEqual(e, a) {
|
||||||
t.Errorf("%v: error: wanted %v, got %v", i, e, a)
|
t.Errorf("%v: error: wanted %v, got %v", i, e, a)
|
||||||
}
|
}
|
||||||
|
@ -25,6 +25,7 @@ import (
|
|||||||
// FakeCache is used for testing
|
// FakeCache is used for testing
|
||||||
type FakeCache struct {
|
type FakeCache struct {
|
||||||
AssumeFunc func(*v1.Pod)
|
AssumeFunc func(*v1.Pod)
|
||||||
|
ForgetFunc func(*v1.Pod)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f *FakeCache) AssumePod(pod *v1.Pod) error {
|
func (f *FakeCache) AssumePod(pod *v1.Pod) error {
|
||||||
@ -34,7 +35,10 @@ func (f *FakeCache) AssumePod(pod *v1.Pod) error {
|
|||||||
|
|
||||||
func (f *FakeCache) FinishBinding(pod *v1.Pod) error { return nil }
|
func (f *FakeCache) FinishBinding(pod *v1.Pod) error { return nil }
|
||||||
|
|
||||||
func (f *FakeCache) ForgetPod(pod *v1.Pod) error { return nil }
|
func (f *FakeCache) ForgetPod(pod *v1.Pod) error {
|
||||||
|
f.ForgetFunc(pod)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func (f *FakeCache) AddPod(pod *v1.Pod) error { return nil }
|
func (f *FakeCache) AddPod(pod *v1.Pod) error { return nil }
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user