mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 11:50:44 +00:00
Merge pull request #108831 from waynepeking348/skip_re_allocate_logic_if_pod_id_already_removed
skip re-allocate logic if pod is already removed to avoid panic
This commit is contained in:
commit
dbd37cb8a8
@ -997,6 +997,18 @@ func (m *ManagerImpl) allocateContainerResources(pod *v1.Pod, container *v1.Cont
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// checkPodActive checks if the given pod is still in activePods list
|
||||||
|
func (m *ManagerImpl) checkPodActive(pod *v1.Pod) bool {
|
||||||
|
activePods := m.activePods()
|
||||||
|
for _, activePod := range activePods {
|
||||||
|
if activePod.UID == pod.UID {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
// GetDeviceRunContainerOptions checks whether we have cached containerDevices
|
// GetDeviceRunContainerOptions checks whether we have cached containerDevices
|
||||||
// for the passed-in <pod, container> and returns its DeviceRunContainerOptions
|
// for the passed-in <pod, container> and returns its DeviceRunContainerOptions
|
||||||
// for the found one. An empty struct is returned in case no cached state is found.
|
// for the found one. An empty struct is returned in case no cached state is found.
|
||||||
@ -1013,6 +1025,12 @@ func (m *ManagerImpl) GetDeviceRunContainerOptions(pod *v1.Pod, container *v1.Co
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if !m.checkPodActive(pod) {
|
||||||
|
klog.ErrorS(nil, "pod deleted from activePods, skip to reAllocate", "podUID", podUID)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
// This is a device plugin resource yet we don't have cached
|
// This is a device plugin resource yet we don't have cached
|
||||||
// resource state. This is likely due to a race during node
|
// resource state. This is likely due to a race during node
|
||||||
// restart. We re-issue allocate request to cover this race.
|
// restart. We re-issue allocate request to cover this race.
|
||||||
|
@ -959,6 +959,69 @@ func TestPodContainerDeviceAllocation(t *testing.T) {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestGetDeviceRunContainerOptions(t *testing.T) {
|
||||||
|
res1 := TestResource{
|
||||||
|
resourceName: "domain1.com/resource1",
|
||||||
|
resourceQuantity: *resource.NewQuantity(int64(2), resource.DecimalSI),
|
||||||
|
devs: checkpoint.DevicesPerNUMA{0: []string{"dev1", "dev2"}},
|
||||||
|
topology: true,
|
||||||
|
}
|
||||||
|
res2 := TestResource{
|
||||||
|
resourceName: "domain2.com/resource2",
|
||||||
|
resourceQuantity: *resource.NewQuantity(int64(1), resource.DecimalSI),
|
||||||
|
devs: checkpoint.DevicesPerNUMA{0: []string{"dev3", "dev4"}},
|
||||||
|
topology: false,
|
||||||
|
}
|
||||||
|
|
||||||
|
testResources := make([]TestResource, 2)
|
||||||
|
testResources = append(testResources, res1)
|
||||||
|
testResources = append(testResources, res2)
|
||||||
|
|
||||||
|
podsStub := activePodsStub{
|
||||||
|
activePods: []*v1.Pod{},
|
||||||
|
}
|
||||||
|
as := require.New(t)
|
||||||
|
|
||||||
|
tmpDir, err := ioutil.TempDir("", "checkpoint")
|
||||||
|
as.Nil(err)
|
||||||
|
defer os.RemoveAll(tmpDir)
|
||||||
|
|
||||||
|
testManager, err := getTestManager(tmpDir, podsStub.getActivePods, testResources)
|
||||||
|
as.Nil(err)
|
||||||
|
|
||||||
|
pod1 := makePod(v1.ResourceList{
|
||||||
|
v1.ResourceName(res1.resourceName): res1.resourceQuantity,
|
||||||
|
v1.ResourceName(res2.resourceName): res2.resourceQuantity,
|
||||||
|
})
|
||||||
|
pod2 := makePod(v1.ResourceList{
|
||||||
|
v1.ResourceName(res2.resourceName): res2.resourceQuantity,
|
||||||
|
})
|
||||||
|
|
||||||
|
activePods := []*v1.Pod{pod1, pod2}
|
||||||
|
podsStub.updateActivePods(activePods)
|
||||||
|
|
||||||
|
err = testManager.Allocate(pod1, &pod1.Spec.Containers[0])
|
||||||
|
as.Nil(err)
|
||||||
|
err = testManager.Allocate(pod2, &pod2.Spec.Containers[0])
|
||||||
|
as.Nil(err)
|
||||||
|
|
||||||
|
// when pod is in activePods, GetDeviceRunContainerOptions should return
|
||||||
|
runContainerOpts, err := testManager.GetDeviceRunContainerOptions(pod1, &pod1.Spec.Containers[0])
|
||||||
|
as.Nil(err)
|
||||||
|
as.Equal(len(runContainerOpts.Devices), 3)
|
||||||
|
as.Equal(len(runContainerOpts.Mounts), 2)
|
||||||
|
as.Equal(len(runContainerOpts.Envs), 2)
|
||||||
|
|
||||||
|
activePods = []*v1.Pod{pod2}
|
||||||
|
podsStub.updateActivePods(activePods)
|
||||||
|
testManager.UpdateAllocatedDevices()
|
||||||
|
|
||||||
|
// when pod is removed from activePods,G etDeviceRunContainerOptions should return error
|
||||||
|
runContainerOpts, err = testManager.GetDeviceRunContainerOptions(pod1, &pod1.Spec.Containers[0])
|
||||||
|
as.Nil(err)
|
||||||
|
as.Nil(runContainerOpts)
|
||||||
|
}
|
||||||
|
|
||||||
func TestInitContainerDeviceAllocation(t *testing.T) {
|
func TestInitContainerDeviceAllocation(t *testing.T) {
|
||||||
// Requesting to create a pod that requests resourceName1 in init containers and normal containers
|
// Requesting to create a pod that requests resourceName1 in init containers and normal containers
|
||||||
// should succeed with devices allocated to init containers reallocated to normal containers.
|
// should succeed with devices allocated to init containers reallocated to normal containers.
|
||||||
|
Loading…
Reference in New Issue
Block a user