kubelet: do not enter termination status if pod might need to unprepare resources

This commit is contained in:
Ed Bartosh 2022-11-11 12:40:18 +02:00 committed by Patrick Ohly
parent ae0f38437c
commit abcb56defb
9 changed files with 81 additions and 5 deletions

View File

@ -22,7 +22,9 @@ import (
"strings"
"time"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/sets"
// TODO: Migrate kubelet to either use its own internal objects or client library.
v1 "k8s.io/api/core/v1"
internalapi "k8s.io/cri-api/pkg/apis"
@ -122,6 +124,10 @@ type ContainerManager interface {
// UnrepareResources unprepares pod resources
UnprepareResources(*v1.Pod) error
// PodMightNeedToUnprepareResources returns true if the pod with the given UID
// might need to unprepare resources.
PodMightNeedToUnprepareResources(UID types.UID) bool
// Implements the podresources Provider API for CPUs, Memory and Devices
podresources.CPUsProvider
podresources.DevicesProvider

View File

@ -39,6 +39,7 @@ import (
libcontaineruserns "github.com/opencontainers/runc/libcontainer/userns"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/types"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
@ -1038,3 +1039,11 @@ func (cm *containerManagerImpl) PrepareResources(pod *v1.Pod, container *v1.Cont
func (cm *containerManagerImpl) UnprepareResources(pod *v1.Pod) error {
return cm.draManager.UnprepareResources(pod)
}
func (cm *containerManagerImpl) PodMightNeedToUnprepareResources(UID types.UID) bool {
if cm.draManager != nil {
return cm.draManager.PodMightNeedToUnprepareResources(UID)
}
return false
}

View File

@ -21,6 +21,7 @@ import (
"k8s.io/klog/v2"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/types"
internalapi "k8s.io/cri-api/pkg/apis"
podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1"
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager"
@ -163,6 +164,10 @@ func (cm *containerManagerStub) UnprepareResources(*v1.Pod) error {
return nil
}
func (cm *containerManagerStub) PodMightNeedToUnprepareResources(UID types.UID) bool {
return false
}
func NewStubContainerManager() ContainerManager {
return &containerManagerStub{shouldResetExtendedResourceCapacity: false}
}

View File

@ -30,6 +30,7 @@ import (
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/types"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/record"
internalapi "k8s.io/cri-api/pkg/apis"
@ -260,3 +261,7 @@ func (cm *containerManagerImpl) PrepareResources(pod *v1.Pod, container *v1.Cont
func (cm *containerManagerImpl) UnprepareResources(*v1.Pod) error {
return nil
}
func (cm *containerManagerImpl) PodMightNeedToUnprepareResources(UID types.UID) bool {
return false
}

View File

@ -108,3 +108,20 @@ func (cache *claimInfoCache) delete(claimName, namespace string) {
delete(cache.claimInfo, claimName+namespace)
}
// hasPodReference checks if there is at least one claim
// that is referenced by the pod with the given UID
// This function is used indirectly by the status manager
// to check if pod can enter termination status
func (cache *claimInfoCache) hasPodReference(UID types.UID) bool {
cache.RLock()
defer cache.RUnlock()
for _, claimInfo := range cache.claimInfo {
if claimInfo.podUIDs.Has(string(UID)) {
return true
}
}
return false
}

View File

@ -207,11 +207,9 @@ func (m *ManagerImpl) UnprepareResources(pod *v1.Pod) error {
continue
}
// Delete pod UID from the cache
claimInfo.deletePodReference(pod.UID)
// Skip calling NodeUnprepareResource if other pods are still referencing it
if len(claimInfo.podUIDs) > 0 {
if len(claimInfo.podUIDs) > 1 {
claimInfo.deletePodReference(pod.UID)
continue
}
@ -236,6 +234,12 @@ func (m *ManagerImpl) UnprepareResources(pod *v1.Pod) error {
claimInfo.cdiDevices, err)
}
// Delete last pod UID only if NodeUnprepareResource call succeeds.
// This ensures that status manager doesn't enter termination status
// for the pod. This logic is implemented in the m.PodMightNeedToUnprepareResources
// and in the claimInfo.hasPodReference.
claimInfo.deletePodReference(pod.UID)
klog.V(3).InfoS("NodeUnprepareResource succeeded", "response", response)
// delete resource from the cache
m.cache.delete(claimInfo.claimName, pod.Namespace)
@ -243,3 +247,9 @@ func (m *ManagerImpl) UnprepareResources(pod *v1.Pod) error {
return nil
}
// PodMightNeedToUnprepareResources returns true if the pod might need to
// unprepare resources
func (m *ManagerImpl) PodMightNeedToUnprepareResources(UID types.UID) bool {
return m.cache.hasPodReference(UID)
}

View File

@ -18,6 +18,7 @@ package dra
import (
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
)
@ -30,6 +31,10 @@ type Manager interface {
// UnprepareResources calls NodeUnprepareResource GRPC from DRA plugin to unprepare pod resources
UnprepareResources(pod *v1.Pod) error
// PodMightNeedToUnprepareResources returns true if the pod with the given UID
// might need to unprepare resources.
PodMightNeedToUnprepareResources(UID types.UID) bool
}
// ContainerInfo contains information required by the runtime to consume prepared resources.

View File

@ -22,6 +22,7 @@ import (
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/types"
internalapi "k8s.io/cri-api/pkg/apis"
podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1"
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager"
@ -245,3 +246,7 @@ func (cm *FakeContainerManager) PrepareResources(pod *v1.Pod, container *v1.Cont
func (cm *FakeContainerManager) UnprepareResources(*v1.Pod) error {
return nil
}
func (cm *FakeContainerManager) PodMightNeedToUnprepareResources(UID types.UID) bool {
return false
}

View File

@ -927,7 +927,21 @@ func countRunningContainerStatus(status v1.PodStatus) int {
// PodCouldHaveRunningContainers returns true if the pod with the given UID could still have running
// containers. This returns false if the pod has not yet been started or the pod is unknown.
func (kl *Kubelet) PodCouldHaveRunningContainers(pod *v1.Pod) bool {
return kl.podWorkers.CouldHaveRunningContainers(pod.UID)
if kl.podWorkers.CouldHaveRunningContainers(pod.UID) {
return true
}
// Check if pod might need to unprepare resources before termination
// NOTE: This is a temporary solution. This call is here to avoid changing
// status manager and its tests.
// TODO: extend PodDeletionSafetyProvider interface and implement it
// in a separate Kubelet method.
if utilfeature.DefaultFeatureGate.Enabled(features.DynamicResourceAllocation) {
if kl.containerManager.PodMightNeedToUnprepareResources(pod.UID) {
return true
}
}
return false
}
// PodResourcesAreReclaimed returns true if all required node-level resources that a pod was consuming have