skip reallocate logic if pod is already removed

This commit is contained in:
waynepeking348 2022-03-20 20:45:41 +08:00 committed by shaowei.wayne
parent 475f7af1c1
commit 35a456b0c6
2 changed files with 59 additions and 0 deletions

View File

@ -997,11 +997,28 @@ func (m *ManagerImpl) allocateContainerResources(pod *v1.Pod, container *v1.Cont
return nil
}
// checkPodActive checks if the given pod is still in activePods list
func (m *ManagerImpl) checkPodActive(pod *v1.Pod) bool {
activePods := m.activePods()
for _, activePod := range activePods {
if activePod.UID == pod.UID {
return true
}
}
return false
}
// GetDeviceRunContainerOptions checks whether we have cached containerDevices
// for the passed-in <pod, container> and returns its DeviceRunContainerOptions
// for the found one. An empty struct is returned in case no cached state is found.
func (m *ManagerImpl) GetDeviceRunContainerOptions(pod *v1.Pod, container *v1.Container) (*DeviceRunContainerOptions, error) {
podUID := string(pod.UID)
if !m.checkPodActive(pod) {
klog.Warningf("pod %s has been deleted from activePods, skip getting device run options", podUID)
return nil, fmt.Errorf("pod %v is removed from activePods list", podUID)
}
contName := container.Name
needsReAllocate := false
for k, v := range container.Resources.Limits {

View File

@ -959,6 +959,48 @@ func TestPodContainerDeviceAllocation(t *testing.T) {
}
func TestGetDeviceRunContainerOptions(t *testing.T) {
res := TestResource{
resourceName: "domain1.com/resource1",
resourceQuantity: *resource.NewQuantity(int64(2), resource.DecimalSI),
devs: checkpoint.DevicesPerNUMA{0: []string{"dev1", "dev2"}},
topology: true,
}
testResources := []TestResource{res}
podsStub := activePodsStub{
activePods: []*v1.Pod{},
}
as := require.New(t)
tmpDir, err := ioutil.TempDir("", "checkpoint")
as.Nil(err)
defer os.RemoveAll(tmpDir)
testManager, err := getTestManager(tmpDir, podsStub.getActivePods, testResources)
as.Nil(err)
pod := makePod(v1.ResourceList{v1.ResourceName(res.resourceName): res.resourceQuantity})
activePods := []*v1.Pod{pod}
podsStub.updateActivePods(activePods)
err = testManager.Allocate(pod, &pod.Spec.Containers[0])
as.Nil(err)
// when pod is in activePods, GetDeviceRunContainerOptions should return
_, err = testManager.GetDeviceRunContainerOptions(pod, &pod.Spec.Containers[0])
as.Nil(err)
activePods = []*v1.Pod{}
podsStub.updateActivePods(activePods)
// when pod is removed from activePods,G etDeviceRunContainerOptions should return error
_, err = testManager.GetDeviceRunContainerOptions(pod, &pod.Spec.Containers[0])
expectedErr := fmt.Errorf("pod %v is removed from activePods list", pod.UID)
as.NotNil(err)
if !reflect.DeepEqual(err, expectedErr) {
t.Errorf("GetDeviceRunContainerOptions. expected error: %v but got: %v", expectedErr, err)
}
}
func TestInitContainerDeviceAllocation(t *testing.T) {
// Requesting to create a pod that requests resourceName1 in init containers and normal containers
// should succeed with devices allocated to init containers reallocated to normal containers.