DRA device eviction: fix eviction triggered by pod scheduling

Normally the scheduler shouldn't schedule when there is a taint, but perhaps it
didn't know yet.

The TestEviction/update test covered this, but only failed under the right
timing conditions. The new event handler test case covers it reliably.
This commit is contained in:
Patrick Ohly 2025-03-20 18:15:58 +01:00
parent 5856d3ee6f
commit 56adcd06f3
2 changed files with 23 additions and 1 deletions

View File

@ -794,12 +794,13 @@ func (tc *Controller) handlePodChange(oldPod, newPod *v1.Pod) {
// Pods get updated quite frequently. There's no need // Pods get updated quite frequently. There's no need
// to check them again unless something changed regarding // to check them again unless something changed regarding
// their claims. // their claims or they got scheduled.
// //
// In particular this prevents adding the pod again // In particular this prevents adding the pod again
// directly after the eviction condition got added // directly after the eviction condition got added
// to it. // to it.
if oldPod != nil && if oldPod != nil &&
oldPod.Spec.NodeName == newPod.Spec.NodeName &&
apiequality.Semantic.DeepEqual(oldPod.Status.ResourceClaimStatuses, newPod.Status.ResourceClaimStatuses) { apiequality.Semantic.DeepEqual(oldPod.Status.ResourceClaimStatuses, newPod.Status.ResourceClaimStatuses) {
return return
} }

View File

@ -318,6 +318,10 @@ var (
OwnerReference(podName, podUID+"-other", podKind). OwnerReference(podName, podUID+"-other", podKind).
UID("other"). UID("other").
Obj() Obj()
unscheduledPodWithClaimName = st.MakePod().Name(podName).Namespace(namespace).
UID(podUID).
PodResourceClaims(v1.PodResourceClaim{Name: resourceName, ResourceClaimName: &claimName}).
Obj()
podWithClaimName = st.MakePod().Name(podName).Namespace(namespace). podWithClaimName = st.MakePod().Name(podName).Namespace(namespace).
UID(podUID). UID(podUID).
PodResourceClaims(v1.PodResourceClaim{Name: resourceName, ResourceClaimName: &claimName}). PodResourceClaims(v1.PodResourceClaim{Name: resourceName, ResourceClaimName: &claimName}).
@ -494,6 +498,23 @@ func TestHandlers(t *testing.T) {
// At the moment, the code reliably cancels right away. // At the moment, the code reliably cancels right away.
wantEvents: []*v1.Event{cancelPodEviction}, wantEvents: []*v1.Event{cancelPodEviction},
}, },
"evict-pod-after-scheduling": {
initialState: state{
pods: []*v1.Pod{unscheduledPodWithClaimName},
slices: []*resourceapi.ResourceSlice{sliceTainted, slice2},
allocatedClaims: []allocatedClaim{{ResourceClaim: inUseClaim, evictionTime: &taintTime}},
},
events: []any{
// Normally the scheduler shouldn't schedule when there is a taint,
// but perhaps it didn't know yet.
update(unscheduledPodWithClaimName, podWithClaimName),
},
finalState: state{
slices: []*resourceapi.ResourceSlice{sliceTainted, slice2},
allocatedClaims: []allocatedClaim{{ResourceClaim: inUseClaim, evictionTime: &taintTime}},
evicting: []evictAt{{newObject(podWithClaimName), taintTime.Time}},
},
},
"evict-pod-resourceclaim-unrelated-changes": { "evict-pod-resourceclaim-unrelated-changes": {
initialState: state{ initialState: state{
pods: []*v1.Pod{podWithClaimName}, pods: []*v1.Pod{podWithClaimName},