mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-18 16:21:13 +00:00
Merge pull request #50028 from julia-stripe/fix-incorrect-scheduler-bind-call
Automatic merge from submit-queue
Fix incorrect call to 'bind' in scheduler
I previously submitted https://github.com/kubernetes/kubernetes/pull/49661 -- I'm not sure if that PR is too big or what, but this is an attempt at a smaller PR that makes progress on the same issue and is easier to review.
**What this PR does / why we need it**:
In this refactor (https://github.com/kubernetes/kubernetes/commit/ecb962e6585#diff-67f2b61521299ca8d8687b0933bbfb19R223) the scheduler code was refactored into separate `bind` and `assume` functions. When that happened, `bind` was called with `pod` as an argument. The argument to `bind` should be the assumed pod, not the original pod. Evidence that `assumedPod` is the correct argument bind and not `pod`: 80f26fa8a8/plugin/pkg/scheduler/scheduler.go (L229-L234)
. (and it says `assumed` in the function signature for `bind`, even though it's not called with the assumed pod as an argument).
This is an issue (and causes #49314, where pods that fail to bind to a node get stuck indefinitely) in the following scenario:
1. The pod fails to bind to the node
2. `bind` calls `ForgetPod` with the `pod` argument
3. since `ForgetPod` is expecting the assumed pod as an argument (because that's what's in the scheduler cache), it fails with an error like `scheduler cache ForgetPod failed: pod test-677550-rc-edit-namespace/nginx-jvn09 state was assumed on a different node`
4. The pod gets lost forever because of some incomplete error handling (which I haven't addressed here in the interest of making a simpler PR)
In this PR I've fixed the call to `bind` and modified the tests to make sure that `ForgetPod` gets called with the correct argument (the assumed pod) when binding fails.
**Which issue this PR fixes**: fixes #49314
**Special notes for your reviewer**:
**Release note**:
```release-note
```
This commit is contained in:
commit
898b1b3330
@ -185,14 +185,14 @@ func (sched *Scheduler) schedule(pod *v1.Pod) (string, error) {
|
||||
}
|
||||
|
||||
// assume signals to the cache that a pod is already in the cache, so that binding can be asnychronous.
|
||||
func (sched *Scheduler) assume(pod *v1.Pod, host string) error {
|
||||
// assume modifies `assumed`.
|
||||
func (sched *Scheduler) assume(assumed *v1.Pod, host string) error {
|
||||
// Optimistically assume that the binding will succeed and send it to apiserver
|
||||
// in the background.
|
||||
// If the binding fails, scheduler will release resources allocated to assumed pod
|
||||
// immediately.
|
||||
assumed := *pod
|
||||
assumed.Spec.NodeName = host
|
||||
if err := sched.config.SchedulerCache.AssumePod(&assumed); err != nil {
|
||||
if err := sched.config.SchedulerCache.AssumePod(assumed); err != nil {
|
||||
glog.Errorf("scheduler cache AssumePod failed: %v", err)
|
||||
// TODO: This means that a given pod is already in cache (which means it
|
||||
// is either assumed or already added). This is most probably result of a
|
||||
@ -207,7 +207,7 @@ func (sched *Scheduler) assume(pod *v1.Pod, host string) error {
|
||||
// predicates in equivalence cache.
|
||||
// If the binding fails, these invalidated item will not break anything.
|
||||
if sched.config.Ecache != nil {
|
||||
sched.config.Ecache.InvalidateCachedPredicateItemForPodAdd(pod, host)
|
||||
sched.config.Ecache.InvalidateCachedPredicateItemForPodAdd(assumed, host)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@ -264,15 +264,17 @@ func (sched *Scheduler) scheduleOne() {
|
||||
|
||||
// Tell the cache to assume that a pod now is running on a given node, even though it hasn't been bound yet.
|
||||
// This allows us to keep scheduling without waiting on binding to occur.
|
||||
err = sched.assume(pod, suggestedHost)
|
||||
assumedPod := *pod
|
||||
// assume modifies `assumedPod` by setting NodeName=suggestedHost
|
||||
err = sched.assume(&assumedPod, suggestedHost)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// bind the pod to its host asynchronously (we can do this b/c of the assumption step above).
|
||||
go func() {
|
||||
err := sched.bind(pod, &v1.Binding{
|
||||
ObjectMeta: metav1.ObjectMeta{Namespace: pod.Namespace, Name: pod.Name, UID: pod.UID},
|
||||
err := sched.bind(&assumedPod, &v1.Binding{
|
||||
ObjectMeta: metav1.ObjectMeta{Namespace: assumedPod.Namespace, Name: assumedPod.Name, UID: assumedPod.UID},
|
||||
Target: v1.ObjectReference{
|
||||
Kind: "Node",
|
||||
Name: suggestedHost,
|
||||
|
@ -115,6 +115,7 @@ func TestScheduler(t *testing.T) {
|
||||
sendPod *v1.Pod
|
||||
algo algorithm.ScheduleAlgorithm
|
||||
expectErrorPod *v1.Pod
|
||||
expectForgetPod *v1.Pod
|
||||
expectAssumedPod *v1.Pod
|
||||
expectError error
|
||||
expectBind *v1.Binding
|
||||
@ -139,7 +140,8 @@ func TestScheduler(t *testing.T) {
|
||||
expectAssumedPod: podWithID("foo", testNode.Name),
|
||||
injectBindError: errB,
|
||||
expectError: errB,
|
||||
expectErrorPod: podWithID("foo", ""),
|
||||
expectErrorPod: podWithID("foo", testNode.Name),
|
||||
expectForgetPod: podWithID("foo", testNode.Name),
|
||||
eventReason: "FailedScheduling",
|
||||
}, {
|
||||
sendPod: deletingPod("foo"),
|
||||
@ -151,11 +153,15 @@ func TestScheduler(t *testing.T) {
|
||||
for i, item := range table {
|
||||
var gotError error
|
||||
var gotPod *v1.Pod
|
||||
var gotForgetPod *v1.Pod
|
||||
var gotAssumedPod *v1.Pod
|
||||
var gotBinding *v1.Binding
|
||||
configurator := &FakeConfigurator{
|
||||
Config: &Config{
|
||||
SchedulerCache: &schedulertesting.FakeCache{
|
||||
ForgetFunc: func(pod *v1.Pod) {
|
||||
gotForgetPod = pod
|
||||
},
|
||||
AssumeFunc: func(pod *v1.Pod) {
|
||||
gotAssumedPod = pod
|
||||
},
|
||||
@ -196,6 +202,9 @@ func TestScheduler(t *testing.T) {
|
||||
if e, a := item.expectErrorPod, gotPod; !reflect.DeepEqual(e, a) {
|
||||
t.Errorf("%v: error pod: wanted %v, got %v", i, e, a)
|
||||
}
|
||||
if e, a := item.expectForgetPod, gotForgetPod; !reflect.DeepEqual(e, a) {
|
||||
t.Errorf("%v: forget pod: wanted %v, got %v", i, e, a)
|
||||
}
|
||||
if e, a := item.expectError, gotError; !reflect.DeepEqual(e, a) {
|
||||
t.Errorf("%v: error: wanted %v, got %v", i, e, a)
|
||||
}
|
||||
|
@ -25,6 +25,7 @@ import (
|
||||
// FakeCache is used for testing
|
||||
type FakeCache struct {
|
||||
AssumeFunc func(*v1.Pod)
|
||||
ForgetFunc func(*v1.Pod)
|
||||
}
|
||||
|
||||
func (f *FakeCache) AssumePod(pod *v1.Pod) error {
|
||||
@ -34,7 +35,10 @@ func (f *FakeCache) AssumePod(pod *v1.Pod) error {
|
||||
|
||||
func (f *FakeCache) FinishBinding(pod *v1.Pod) error { return nil }
|
||||
|
||||
func (f *FakeCache) ForgetPod(pod *v1.Pod) error { return nil }
|
||||
func (f *FakeCache) ForgetPod(pod *v1.Pod) error {
|
||||
f.ForgetFunc(pod)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *FakeCache) AddPod(pod *v1.Pod) error { return nil }
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user