mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-30 15:05:27 +00:00
kubelet: do not enter termination status if pod might need to unprepare resources
This commit is contained in:
parent
ae0f38437c
commit
abcb56defb
@ -22,7 +22,9 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
|
||||
// TODO: Migrate kubelet to either use its own internal objects or client library.
|
||||
v1 "k8s.io/api/core/v1"
|
||||
internalapi "k8s.io/cri-api/pkg/apis"
|
||||
@ -122,6 +124,10 @@ type ContainerManager interface {
|
||||
// UnrepareResources unprepares pod resources
|
||||
UnprepareResources(*v1.Pod) error
|
||||
|
||||
// PodMightNeedToUnprepareResources returns true if the pod with the given UID
|
||||
// might need to unprepare resources.
|
||||
PodMightNeedToUnprepareResources(UID types.UID) bool
|
||||
|
||||
// Implements the podresources Provider API for CPUs, Memory and Devices
|
||||
podresources.CPUsProvider
|
||||
podresources.DevicesProvider
|
||||
|
@ -39,6 +39,7 @@ import (
|
||||
libcontaineruserns "github.com/opencontainers/runc/libcontainer/userns"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
utilerrors "k8s.io/apimachinery/pkg/util/errors"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
@ -1038,3 +1039,11 @@ func (cm *containerManagerImpl) PrepareResources(pod *v1.Pod, container *v1.Cont
|
||||
func (cm *containerManagerImpl) UnprepareResources(pod *v1.Pod) error {
|
||||
return cm.draManager.UnprepareResources(pod)
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) PodMightNeedToUnprepareResources(UID types.UID) bool {
|
||||
if cm.draManager != nil {
|
||||
return cm.draManager.PodMightNeedToUnprepareResources(UID)
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
@ -21,6 +21,7 @@ import (
|
||||
"k8s.io/klog/v2"
|
||||
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
internalapi "k8s.io/cri-api/pkg/apis"
|
||||
podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager"
|
||||
@ -163,6 +164,10 @@ func (cm *containerManagerStub) UnprepareResources(*v1.Pod) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *containerManagerStub) PodMightNeedToUnprepareResources(UID types.UID) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func NewStubContainerManager() ContainerManager {
|
||||
return &containerManagerStub{shouldResetExtendedResourceCapacity: false}
|
||||
}
|
||||
|
@ -30,6 +30,7 @@ import (
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
"k8s.io/client-go/tools/record"
|
||||
internalapi "k8s.io/cri-api/pkg/apis"
|
||||
@ -260,3 +261,7 @@ func (cm *containerManagerImpl) PrepareResources(pod *v1.Pod, container *v1.Cont
|
||||
func (cm *containerManagerImpl) UnprepareResources(*v1.Pod) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) PodMightNeedToUnprepareResources(UID types.UID) bool {
|
||||
return false
|
||||
}
|
||||
|
@ -108,3 +108,20 @@ func (cache *claimInfoCache) delete(claimName, namespace string) {
|
||||
|
||||
delete(cache.claimInfo, claimName+namespace)
|
||||
}
|
||||
|
||||
// hasPodReference checks if there is at least one claim
|
||||
// that is referenced by the pod with the given UID
|
||||
// This function is used indirectly by the status manager
|
||||
// to check if pod can enter termination status
|
||||
func (cache *claimInfoCache) hasPodReference(UID types.UID) bool {
|
||||
cache.RLock()
|
||||
defer cache.RUnlock()
|
||||
|
||||
for _, claimInfo := range cache.claimInfo {
|
||||
if claimInfo.podUIDs.Has(string(UID)) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
@ -207,11 +207,9 @@ func (m *ManagerImpl) UnprepareResources(pod *v1.Pod) error {
|
||||
continue
|
||||
}
|
||||
|
||||
// Delete pod UID from the cache
|
||||
claimInfo.deletePodReference(pod.UID)
|
||||
|
||||
// Skip calling NodeUnprepareResource if other pods are still referencing it
|
||||
if len(claimInfo.podUIDs) > 0 {
|
||||
if len(claimInfo.podUIDs) > 1 {
|
||||
claimInfo.deletePodReference(pod.UID)
|
||||
continue
|
||||
}
|
||||
|
||||
@ -236,6 +234,12 @@ func (m *ManagerImpl) UnprepareResources(pod *v1.Pod) error {
|
||||
claimInfo.cdiDevices, err)
|
||||
}
|
||||
|
||||
// Delete last pod UID only if NodeUnprepareResource call succeeds.
|
||||
// This ensures that status manager doesn't enter termination status
|
||||
// for the pod. This logic is implemented in the m.PodMightNeedToUnprepareResources
|
||||
// and in the claimInfo.hasPodReference.
|
||||
claimInfo.deletePodReference(pod.UID)
|
||||
|
||||
klog.V(3).InfoS("NodeUnprepareResource succeeded", "response", response)
|
||||
// delete resource from the cache
|
||||
m.cache.delete(claimInfo.claimName, pod.Namespace)
|
||||
@ -243,3 +247,9 @@ func (m *ManagerImpl) UnprepareResources(pod *v1.Pod) error {
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// PodMightNeedToUnprepareResources returns true if the pod might need to
|
||||
// unprepare resources
|
||||
func (m *ManagerImpl) PodMightNeedToUnprepareResources(UID types.UID) bool {
|
||||
return m.cache.hasPodReference(UID)
|
||||
}
|
||||
|
@ -18,6 +18,7 @@ package dra
|
||||
|
||||
import (
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
)
|
||||
|
||||
@ -30,6 +31,10 @@ type Manager interface {
|
||||
|
||||
// UnprepareResources calls NodeUnprepareResource GRPC from DRA plugin to unprepare pod resources
|
||||
UnprepareResources(pod *v1.Pod) error
|
||||
|
||||
// PodMightNeedToUnprepareResources returns true if the pod with the given UID
|
||||
// might need to unprepare resources.
|
||||
PodMightNeedToUnprepareResources(UID types.UID) bool
|
||||
}
|
||||
|
||||
// ContainerInfo contains information required by the runtime to consume prepared resources.
|
||||
|
@ -22,6 +22,7 @@ import (
|
||||
v1 "k8s.io/api/core/v1"
|
||||
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
internalapi "k8s.io/cri-api/pkg/apis"
|
||||
podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager"
|
||||
@ -245,3 +246,7 @@ func (cm *FakeContainerManager) PrepareResources(pod *v1.Pod, container *v1.Cont
|
||||
func (cm *FakeContainerManager) UnprepareResources(*v1.Pod) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *FakeContainerManager) PodMightNeedToUnprepareResources(UID types.UID) bool {
|
||||
return false
|
||||
}
|
||||
|
@ -927,7 +927,21 @@ func countRunningContainerStatus(status v1.PodStatus) int {
|
||||
// PodCouldHaveRunningContainers returns true if the pod with the given UID could still have running
|
||||
// containers. This returns false if the pod has not yet been started or the pod is unknown.
|
||||
func (kl *Kubelet) PodCouldHaveRunningContainers(pod *v1.Pod) bool {
|
||||
return kl.podWorkers.CouldHaveRunningContainers(pod.UID)
|
||||
if kl.podWorkers.CouldHaveRunningContainers(pod.UID) {
|
||||
return true
|
||||
}
|
||||
|
||||
// Check if pod might need to unprepare resources before termination
|
||||
// NOTE: This is a temporary solution. This call is here to avoid changing
|
||||
// status manager and its tests.
|
||||
// TODO: extend PodDeletionSafetyProvider interface and implement it
|
||||
// in a separate Kubelet method.
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.DynamicResourceAllocation) {
|
||||
if kl.containerManager.PodMightNeedToUnprepareResources(pod.UID) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// PodResourcesAreReclaimed returns true if all required node-level resources that a pod was consuming have
|
||||
|
Loading…
Reference in New Issue
Block a user