mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-22 19:31:44 +00:00
Merge pull request #108831 from waynepeking348/skip_re_allocate_logic_if_pod_id_already_removed
skip re-allocate logic if pod is already removed to avoid panic
This commit is contained in:
commit
dbd37cb8a8
@ -997,6 +997,18 @@ func (m *ManagerImpl) allocateContainerResources(pod *v1.Pod, container *v1.Cont
|
||||
return nil
|
||||
}
|
||||
|
||||
// checkPodActive checks if the given pod is still in activePods list
|
||||
func (m *ManagerImpl) checkPodActive(pod *v1.Pod) bool {
|
||||
activePods := m.activePods()
|
||||
for _, activePod := range activePods {
|
||||
if activePod.UID == pod.UID {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// GetDeviceRunContainerOptions checks whether we have cached containerDevices
|
||||
// for the passed-in <pod, container> and returns its DeviceRunContainerOptions
|
||||
// for the found one. An empty struct is returned in case no cached state is found.
|
||||
@ -1013,6 +1025,12 @@ func (m *ManagerImpl) GetDeviceRunContainerOptions(pod *v1.Pod, container *v1.Co
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if !m.checkPodActive(pod) {
|
||||
klog.ErrorS(nil, "pod deleted from activePods, skip to reAllocate", "podUID", podUID)
|
||||
continue
|
||||
}
|
||||
|
||||
// This is a device plugin resource yet we don't have cached
|
||||
// resource state. This is likely due to a race during node
|
||||
// restart. We re-issue allocate request to cover this race.
|
||||
|
@ -959,6 +959,69 @@ func TestPodContainerDeviceAllocation(t *testing.T) {
|
||||
|
||||
}
|
||||
|
||||
func TestGetDeviceRunContainerOptions(t *testing.T) {
|
||||
res1 := TestResource{
|
||||
resourceName: "domain1.com/resource1",
|
||||
resourceQuantity: *resource.NewQuantity(int64(2), resource.DecimalSI),
|
||||
devs: checkpoint.DevicesPerNUMA{0: []string{"dev1", "dev2"}},
|
||||
topology: true,
|
||||
}
|
||||
res2 := TestResource{
|
||||
resourceName: "domain2.com/resource2",
|
||||
resourceQuantity: *resource.NewQuantity(int64(1), resource.DecimalSI),
|
||||
devs: checkpoint.DevicesPerNUMA{0: []string{"dev3", "dev4"}},
|
||||
topology: false,
|
||||
}
|
||||
|
||||
testResources := make([]TestResource, 2)
|
||||
testResources = append(testResources, res1)
|
||||
testResources = append(testResources, res2)
|
||||
|
||||
podsStub := activePodsStub{
|
||||
activePods: []*v1.Pod{},
|
||||
}
|
||||
as := require.New(t)
|
||||
|
||||
tmpDir, err := ioutil.TempDir("", "checkpoint")
|
||||
as.Nil(err)
|
||||
defer os.RemoveAll(tmpDir)
|
||||
|
||||
testManager, err := getTestManager(tmpDir, podsStub.getActivePods, testResources)
|
||||
as.Nil(err)
|
||||
|
||||
pod1 := makePod(v1.ResourceList{
|
||||
v1.ResourceName(res1.resourceName): res1.resourceQuantity,
|
||||
v1.ResourceName(res2.resourceName): res2.resourceQuantity,
|
||||
})
|
||||
pod2 := makePod(v1.ResourceList{
|
||||
v1.ResourceName(res2.resourceName): res2.resourceQuantity,
|
||||
})
|
||||
|
||||
activePods := []*v1.Pod{pod1, pod2}
|
||||
podsStub.updateActivePods(activePods)
|
||||
|
||||
err = testManager.Allocate(pod1, &pod1.Spec.Containers[0])
|
||||
as.Nil(err)
|
||||
err = testManager.Allocate(pod2, &pod2.Spec.Containers[0])
|
||||
as.Nil(err)
|
||||
|
||||
// when pod is in activePods, GetDeviceRunContainerOptions should return
|
||||
runContainerOpts, err := testManager.GetDeviceRunContainerOptions(pod1, &pod1.Spec.Containers[0])
|
||||
as.Nil(err)
|
||||
as.Equal(len(runContainerOpts.Devices), 3)
|
||||
as.Equal(len(runContainerOpts.Mounts), 2)
|
||||
as.Equal(len(runContainerOpts.Envs), 2)
|
||||
|
||||
activePods = []*v1.Pod{pod2}
|
||||
podsStub.updateActivePods(activePods)
|
||||
testManager.UpdateAllocatedDevices()
|
||||
|
||||
// when pod is removed from activePods,G etDeviceRunContainerOptions should return error
|
||||
runContainerOpts, err = testManager.GetDeviceRunContainerOptions(pod1, &pod1.Spec.Containers[0])
|
||||
as.Nil(err)
|
||||
as.Nil(runContainerOpts)
|
||||
}
|
||||
|
||||
func TestInitContainerDeviceAllocation(t *testing.T) {
|
||||
// Requesting to create a pod that requests resourceName1 in init containers and normal containers
|
||||
// should succeed with devices allocated to init containers reallocated to normal containers.
|
||||
|
Loading…
Reference in New Issue
Block a user