Merge pull request #106102 from alculquicondor/delete-pod-tombstone

Ensure deletion of pods in queues and cache
This commit is contained in:
Kubernetes Prow Robot 2021-11-06 10:52:51 -07:00 committed by GitHub
commit d92a443ca7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 57 additions and 51 deletions

View File

@ -270,8 +270,10 @@ func addAllEventHandlers(
case *v1.Pod: case *v1.Pod:
return assignedPod(t) return assignedPod(t)
case cache.DeletedFinalStateUnknown: case cache.DeletedFinalStateUnknown:
if pod, ok := t.Obj.(*v1.Pod); ok { if _, ok := t.Obj.(*v1.Pod); ok {
return assignedPod(pod) // The carried object may be stale, so we don't use it to check if
// it's assigned or not. Attempting to cleanup anyways.
return true
} }
utilruntime.HandleError(fmt.Errorf("unable to convert object %T to *v1.Pod in %T", obj, sched)) utilruntime.HandleError(fmt.Errorf("unable to convert object %T to *v1.Pod in %T", obj, sched))
return false return false
@ -296,7 +298,9 @@ func addAllEventHandlers(
return !assignedPod(t) && responsibleForPod(t, sched.Profiles) return !assignedPod(t) && responsibleForPod(t, sched.Profiles)
case cache.DeletedFinalStateUnknown: case cache.DeletedFinalStateUnknown:
if pod, ok := t.Obj.(*v1.Pod); ok { if pod, ok := t.Obj.(*v1.Pod); ok {
return !assignedPod(pod) && responsibleForPod(pod, sched.Profiles) // The carried object may be stale, so we don't use it to check if
// it's assigned or not.
return responsibleForPod(pod, sched.Profiles)
} }
utilruntime.HandleError(fmt.Errorf("unable to convert object %T to *v1.Pod in %T", obj, sched)) utilruntime.HandleError(fmt.Errorf("unable to convert object %T to *v1.Pod in %T", obj, sched))
return false return false

View File

@ -537,22 +537,19 @@ func (cache *schedulerCache) RemovePod(pod *v1.Pod) error {
currState, ok := cache.podStates[key] currState, ok := cache.podStates[key]
switch { switch {
// An assumed pod won't have Delete/Remove event. It needs to have Add event case ok:
// before Remove event, in which case the state would change from Assumed to Added.
case ok && !cache.assumedPods.Has(key):
if currState.pod.Spec.NodeName != pod.Spec.NodeName { if currState.pod.Spec.NodeName != pod.Spec.NodeName {
klog.Errorf("Pod %v was assumed to be on %v but got added to %v", key, pod.Spec.NodeName, currState.pod.Spec.NodeName) klog.Errorf("Pod %v was assumed to be on %v but got added to %v", key, pod.Spec.NodeName, currState.pod.Spec.NodeName)
if pod.Spec.NodeName != "" {
// An empty NodeName is possible when the scheduler misses a Delete
// event and it gets the last known state from the informer cache.
klog.Fatalf("Schedulercache is corrupted and can badly affect scheduling decisions") klog.Fatalf("Schedulercache is corrupted and can badly affect scheduling decisions")
} }
err := cache.removePod(currState.pod)
if err != nil {
return err
} }
delete(cache.podStates, key) return cache.expirePod(key, currState)
default: default:
return fmt.Errorf("pod %v is not found in scheduler cache, so cannot be removed from it", key) return fmt.Errorf("pod %v is not found in scheduler cache, so cannot be removed from it", key)
} }
return nil
} }
func (cache *schedulerCache) IsAssumedPod(pod *v1.Pod) (bool, error) { func (cache *schedulerCache) IsAssumedPod(pod *v1.Pod) (bool, error) {

View File

@ -807,22 +807,16 @@ func TestEphemeralStorageResource(t *testing.T) {
// TestRemovePod tests after added pod is removed, its information should also be subtracted. // TestRemovePod tests after added pod is removed, its information should also be subtracted.
func TestRemovePod(t *testing.T) { func TestRemovePod(t *testing.T) {
basePod := makeBasePod(t, "node-1", "test", "100m", "500", "", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 80, Protocol: "TCP"}}) pod := makeBasePod(t, "node-1", "test", "100m", "500", "", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 80, Protocol: "TCP"}})
tests := []struct { nodes := []*v1.Node{
nodes []*v1.Node
pod *v1.Pod
wNodeInfo *framework.NodeInfo
}{{
nodes: []*v1.Node{
{ {
ObjectMeta: metav1.ObjectMeta{Name: "node-1"}, ObjectMeta: metav1.ObjectMeta{Name: "node-1"},
}, },
{ {
ObjectMeta: metav1.ObjectMeta{Name: "node-2"}, ObjectMeta: metav1.ObjectMeta{Name: "node-2"},
}, },
}, }
pod: basePod, wNodeInfo := newNodeInfo(
wNodeInfo: newNodeInfo(
&framework.Resource{ &framework.Resource{
MilliCPU: 100, MilliCPU: 100,
Memory: 500, Memory: 500,
@ -831,33 +825,44 @@ func TestRemovePod(t *testing.T) {
MilliCPU: 100, MilliCPU: 100,
Memory: 500, Memory: 500,
}, },
[]*v1.Pod{basePod}, []*v1.Pod{pod},
newHostPortInfoBuilder().add("TCP", "127.0.0.1", 80).build(), newHostPortInfoBuilder().add("TCP", "127.0.0.1", 80).build(),
make(map[string]*framework.ImageStateSummary), make(map[string]*framework.ImageStateSummary),
), )
}} tests := map[string]struct {
assume bool
}{
"bound": {},
"assumed": {assume: true},
}
for i, tt := range tests { for name, tt := range tests {
t.Run(fmt.Sprintf("case_%d", i), func(t *testing.T) { t.Run(name, func(t *testing.T) {
nodeName := tt.pod.Spec.NodeName nodeName := pod.Spec.NodeName
cache := newSchedulerCache(time.Second, time.Second, nil) cache := newSchedulerCache(time.Second, time.Second, nil)
// Add pod succeeds even before adding the nodes. // Add/Assume pod succeeds even before adding the nodes.
if err := cache.AddPod(tt.pod); err != nil { if tt.assume {
if err := cache.AddPod(pod); err != nil {
t.Fatalf("AddPod failed: %v", err) t.Fatalf("AddPod failed: %v", err)
} }
} else {
if err := cache.AssumePod(pod); err != nil {
t.Fatalf("AssumePod failed: %v", err)
}
}
n := cache.nodes[nodeName] n := cache.nodes[nodeName]
if err := deepEqualWithoutGeneration(n, tt.wNodeInfo); err != nil { if err := deepEqualWithoutGeneration(n, wNodeInfo); err != nil {
t.Error(err) t.Error(err)
} }
for _, n := range tt.nodes { for _, n := range nodes {
cache.AddNode(n) cache.AddNode(n)
} }
if err := cache.RemovePod(tt.pod); err != nil { if err := cache.RemovePod(pod); err != nil {
t.Fatalf("RemovePod failed: %v", err) t.Fatalf("RemovePod failed: %v", err)
} }
if _, err := cache.GetPod(tt.pod); err == nil { if _, err := cache.GetPod(pod); err == nil {
t.Errorf("pod was not deleted") t.Errorf("pod was not deleted")
} }