kubelet: DRA: update locking/checkpoint semantics of the claimInfo cache

Signed-off-by: Kevin Klues <kklues@nvidia.com>
This commit is contained in:
Kevin Klues 2024-04-30 11:09:19 +00:00
parent bac1a0ce1b
commit a8931c6c25
6 changed files with 425 additions and 232 deletions

View File

@ -959,7 +959,6 @@ func (cm *containerManagerImpl) GetDynamicResources(pod *v1.Pod, container *v1.C
}
for _, containerClaimInfo := range containerClaimInfos {
var claimResources []*podresourcesapi.ClaimResource
containerClaimInfo.RLock()
// TODO: Currently we maintain a list of ClaimResources, each of which contains
// a set of CDIDevices from a different kubelet plugin. In the future we may want to
// include the name of the kubelet plugin and/or other types of resources that are
@ -971,7 +970,6 @@ func (cm *containerManagerImpl) GetDynamicResources(pod *v1.Pod, container *v1.C
}
claimResources = append(claimResources, &podresourcesapi.ClaimResource{CDIDevices: cdiDevices})
}
containerClaimInfo.RUnlock()
containerDynamicResource := podresourcesapi.DynamicResource{
ClassName: containerClaimInfo.ClassName,
ClaimName: containerClaimInfo.ClaimName,

View File

@ -30,8 +30,8 @@ import (
// ClaimInfo holds information required
// to prepare and unprepare a resource claim.
// +k8s:deepcopy-gen=true
type ClaimInfo struct {
sync.RWMutex
state.ClaimInfoState
// annotations is a mapping of container annotations per DRA plugin associated with
// a prepared resource
@ -39,24 +39,57 @@ type ClaimInfo struct {
prepared bool
}
func (info *ClaimInfo) addPodReference(podUID types.UID) {
info.Lock()
defer info.Unlock()
info.PodUIDs.Insert(string(podUID))
// claimInfoCache is a cache of processed resource claims keyed by namespace + claim name.
type claimInfoCache struct {
sync.RWMutex
state state.CheckpointState
claimInfo map[string]*ClaimInfo
}
func (info *ClaimInfo) deletePodReference(podUID types.UID) {
info.Lock()
defer info.Unlock()
info.PodUIDs.Delete(string(podUID))
// newClaimInfoFromClaim creates a new claim info from a resource claim.
func newClaimInfoFromClaim(claim *resourcev1alpha2.ResourceClaim) *ClaimInfo {
// Grab the allocation.resourceHandles. If there are no
// allocation.resourceHandles, create a single resourceHandle with no
// content. This will trigger processing of this claim by a single
// kubelet plugin whose name matches resourceClaim.Status.DriverName.
resourceHandles := claim.Status.Allocation.ResourceHandles
if len(resourceHandles) == 0 {
resourceHandles = make([]resourcev1alpha2.ResourceHandle, 1)
}
claimInfoState := state.ClaimInfoState{
DriverName: claim.Status.DriverName,
ClassName: claim.Spec.ResourceClassName,
ClaimUID: claim.UID,
ClaimName: claim.Name,
Namespace: claim.Namespace,
PodUIDs: sets.New[string](),
ResourceHandles: resourceHandles,
CDIDevices: make(map[string][]string),
}
info := &ClaimInfo{
ClaimInfoState: claimInfoState,
annotations: make(map[string][]kubecontainer.Annotation),
prepared: false,
}
return info
}
func (info *ClaimInfo) addCDIDevices(pluginName string, cdiDevices []string) error {
info.Lock()
defer info.Unlock()
// newClaimInfoFromClaim creates a new claim info from a checkpointed claim info state object.
func newClaimInfoFromState(state *state.ClaimInfoState) *ClaimInfo {
info := &ClaimInfo{
ClaimInfoState: *state.DeepCopy(),
annotations: make(map[string][]kubecontainer.Annotation),
prepared: false,
}
for pluginName, devices := range info.CDIDevices {
annotations, _ := cdi.GenerateAnnotations(info.ClaimUID, info.DriverName, devices)
info.annotations[pluginName] = append(info.annotations[pluginName], annotations...)
}
return info
}
// setCDIDevices adds a set of CDI devices to the claim info.
func (info *ClaimInfo) setCDIDevices(pluginName string, cdiDevices []string) error {
// NOTE: Passing CDI device names as annotations is a temporary solution
// It will be removed after all runtimes are updated
// to get CDI device names from the ContainerConfig.CDIDevices field
@ -77,9 +110,6 @@ func (info *ClaimInfo) addCDIDevices(pluginName string, cdiDevices []string) err
// annotationsAsList returns container annotations as a single list.
func (info *ClaimInfo) annotationsAsList() []kubecontainer.Annotation {
info.RLock()
defer info.RUnlock()
var lst []kubecontainer.Annotation
for _, v := range info.annotations {
lst = append(lst, v...)
@ -87,53 +117,43 @@ func (info *ClaimInfo) annotationsAsList() []kubecontainer.Annotation {
return lst
}
// claimInfoCache is a cache of processed resource claims keyed by namespace + claim name.
type claimInfoCache struct {
sync.RWMutex
state state.CheckpointState
claimInfo map[string]*ClaimInfo
// cdiDevicesAsList returns a list of CDIDevices from the provided claim info.
func (info *ClaimInfo) cdiDevicesAsList() []kubecontainer.CDIDevice {
var cdiDevices []kubecontainer.CDIDevice
for _, devices := range info.CDIDevices {
for _, device := range devices {
cdiDevices = append(cdiDevices, kubecontainer.CDIDevice{Name: device})
}
}
return cdiDevices
}
func newClaimInfo(driverName, className string, claimUID types.UID, claimName, namespace string, podUIDs sets.Set[string], resourceHandles []resourcev1alpha2.ResourceHandle) *ClaimInfo {
claimInfoState := state.ClaimInfoState{
DriverName: driverName,
ClassName: className,
ClaimUID: claimUID,
ClaimName: claimName,
Namespace: namespace,
PodUIDs: podUIDs,
ResourceHandles: resourceHandles,
}
claimInfo := ClaimInfo{
ClaimInfoState: claimInfoState,
annotations: make(map[string][]kubecontainer.Annotation),
}
return &claimInfo
// addPodReference adds a pod reference to the claim info.
func (info *ClaimInfo) addPodReference(podUID types.UID) {
info.PodUIDs.Insert(string(podUID))
}
// newClaimInfoFromResourceClaim creates a new ClaimInfo object
func newClaimInfoFromResourceClaim(resourceClaim *resourcev1alpha2.ResourceClaim) *ClaimInfo {
// Grab the allocation.resourceHandles. If there are no
// allocation.resourceHandles, create a single resourceHandle with no
// content. This will trigger processing of this claim by a single
// kubelet plugin whose name matches resourceClaim.Status.DriverName.
resourceHandles := resourceClaim.Status.Allocation.ResourceHandles
if len(resourceHandles) == 0 {
resourceHandles = make([]resourcev1alpha2.ResourceHandle, 1)
}
return newClaimInfo(
resourceClaim.Status.DriverName,
resourceClaim.Spec.ResourceClassName,
resourceClaim.UID,
resourceClaim.Name,
resourceClaim.Namespace,
make(sets.Set[string]),
resourceHandles,
)
// hasPodReference checks if a pod reference exists in the claim info.
func (info *ClaimInfo) hasPodReference(podUID types.UID) bool {
return info.PodUIDs.Has(string(podUID))
}
// newClaimInfoCache is a function that returns an instance of the claimInfoCache.
// deletePodReference deletes a pod reference from the claim info.
func (info *ClaimInfo) deletePodReference(podUID types.UID) {
info.PodUIDs.Delete(string(podUID))
}
// setPrepared marks the claim info as prepared.
func (info *ClaimInfo) setPrepared() {
info.prepared = true
}
// isPrepared checks if claim info is prepared or not.
func (info *ClaimInfo) isPrepared() bool {
return info.prepared
}
// newClaimInfoCache creates a new claim info cache object, pre-populated from a checkpoint (if present).
func newClaimInfoCache(stateDir, checkpointName string) (*claimInfoCache, error) {
stateImpl, err := state.NewCheckpointState(stateDir, checkpointName)
if err != nil {
@ -151,45 +171,47 @@ func newClaimInfoCache(stateDir, checkpointName string) (*claimInfoCache, error)
}
for _, entry := range curState {
info := newClaimInfo(
entry.DriverName,
entry.ClassName,
entry.ClaimUID,
entry.ClaimName,
entry.Namespace,
entry.PodUIDs,
entry.ResourceHandles,
)
for pluginName, cdiDevices := range entry.CDIDevices {
err := info.addCDIDevices(pluginName, cdiDevices)
if err != nil {
return nil, fmt.Errorf("failed to add CDIDevices to claimInfo %+v: %+v", info, err)
}
}
cache.add(info)
info := newClaimInfoFromState(&entry)
cache.claimInfo[info.ClaimName+info.Namespace] = info
}
return cache, nil
}
func (cache *claimInfoCache) add(res *ClaimInfo) {
// withLock runs a function while holding the claimInfoCache lock.
func (cache *claimInfoCache) withLock(f func() error) error {
cache.Lock()
defer cache.Unlock()
cache.claimInfo[res.ClaimName+res.Namespace] = res
return f()
}
func (cache *claimInfoCache) get(claimName, namespace string) *ClaimInfo {
// withRLock runs a function while holding the claimInfoCache rlock.
func (cache *claimInfoCache) withRLock(f func() error) error {
cache.RLock()
defer cache.RUnlock()
return cache.claimInfo[claimName+namespace]
return f()
}
func (cache *claimInfoCache) delete(claimName, namespace string) {
cache.Lock()
defer cache.Unlock()
// add adds a new claim info object into the claim info cache.
func (cache *claimInfoCache) add(info *ClaimInfo) *ClaimInfo {
cache.claimInfo[info.ClaimName+info.Namespace] = info
return info
}
// contains checks to see if a specific claim info object is already in the cache.
func (cache *claimInfoCache) contains(claimName, namespace string) bool {
_, exists := cache.claimInfo[claimName+namespace]
return exists
}
// get gets a specific claim info object from the cache.
func (cache *claimInfoCache) get(claimName, namespace string) (*ClaimInfo, bool) {
info, exists := cache.claimInfo[claimName+namespace]
return info, exists
}
// delete deletes a specific claim info object from the cache.
func (cache *claimInfoCache) delete(claimName, namespace string) {
delete(cache.claimInfo, claimName+namespace)
}
@ -198,26 +220,19 @@ func (cache *claimInfoCache) delete(claimName, namespace string) {
// This function is used indirectly by the status manager
// to check if pod can enter termination status
func (cache *claimInfoCache) hasPodReference(UID types.UID) bool {
cache.RLock()
defer cache.RUnlock()
for _, claimInfo := range cache.claimInfo {
if claimInfo.PodUIDs.Has(string(UID)) {
if claimInfo.hasPodReference(UID) {
return true
}
}
return false
}
// syncToCheckpoint syncs the full claim info cache state to a checkpoint.
func (cache *claimInfoCache) syncToCheckpoint() error {
cache.RLock()
defer cache.RUnlock()
claimInfoStateList := make(state.ClaimInfoStateList, 0, len(cache.claimInfo))
for _, infoClaim := range cache.claimInfo {
claimInfoStateList = append(claimInfoStateList, infoClaim.ClaimInfoState)
}
return cache.state.Store(claimInfoStateList)
}

View File

@ -67,7 +67,7 @@ func NewManagerImpl(kubeClient clientset.Interface, stateFileDirectory string, n
// containerResources on success.
func (m *ManagerImpl) PrepareResources(pod *v1.Pod) error {
batches := make(map[string][]*drapb.Claim)
claimInfos := make(map[types.UID]*ClaimInfo)
resourceClaims := make(map[types.UID]*resourceapi.ResourceClaim)
for i := range pod.Spec.ResourceClaims {
podClaim := &pod.Spec.ResourceClaims[i]
klog.V(3).InfoS("Processing resource", "podClaim", podClaim.Name, "pod", pod.Name)
@ -108,48 +108,60 @@ func (m *ManagerImpl) PrepareResources(pod *v1.Pod) error {
continue
}
claimInfo := m.cache.get(*claimName, pod.Namespace)
if claimInfo == nil {
// claim does not exist in cache, create new claimInfo object
// to be processed later.
claimInfo = newClaimInfoFromResourceClaim(resourceClaim)
}
// We delay checkpointing of this change until this call
// returns successfully. It is OK to do this because we
// will only return successfully from this call if the
// checkpoint has succeeded. That means if the kubelet is
// ever restarted before this checkpoint succeeds, the pod
// whose resources are being prepared would never have
// started, so it's OK (actually correct) to not include it
// in the cache.
claimInfo.addPodReference(pod.UID)
if claimInfo.prepared {
// Already prepared this claim, no need to prepare it again
continue
}
// Loop through all plugins and prepare for calling NodePrepareResources.
for _, resourceHandle := range claimInfo.ResourceHandles {
// If no DriverName is provided in the resourceHandle, we
// use the DriverName from the status
pluginName := resourceHandle.DriverName
if pluginName == "" {
pluginName = resourceClaim.Status.DriverName
// Atomically perform some operations on the claimInfo cache.
err = m.cache.withLock(func() error {
// Get a reference to the claim info for this claim from the cache.
// If there isn't one yet, then add it to the cache.
claimInfo, exists := m.cache.get(resourceClaim.Name, resourceClaim.Namespace)
if !exists {
claimInfo = m.cache.add(newClaimInfoFromClaim(resourceClaim))
}
claim := &drapb.Claim{
Namespace: resourceClaim.Namespace,
Uid: string(resourceClaim.UID),
Name: resourceClaim.Name,
ResourceHandle: resourceHandle.Data,
// Add a reference to the current pod in the claim info.
claimInfo.addPodReference(pod.UID)
// Checkpoint to ensure all claims we plan to prepare are tracked.
// If something goes wrong and the newly referenced pod gets
// deleted without a successful prepare call, we will catch
// that in the reconcile loop and take the appropriate action.
if err := m.cache.syncToCheckpoint(); err != nil {
return fmt.Errorf("failed to checkpoint claimInfo state: %w", err)
}
if resourceHandle.StructuredData != nil {
claim.StructuredResourceHandle = []*resourceapi.StructuredResourceHandle{resourceHandle.StructuredData}
// If this claim is already prepared, there is no need to prepare it again.
if claimInfo.isPrepared() {
return nil
}
batches[pluginName] = append(batches[pluginName], claim)
// This saved claim will be used to update ClaimInfo cache
// after NodePrepareResources GRPC succeeds
resourceClaims[claimInfo.ClaimUID] = resourceClaim
// Loop through all plugins and prepare for calling NodePrepareResources.
for _, resourceHandle := range claimInfo.ResourceHandles {
// If no DriverName is provided in the resourceHandle, we
// use the DriverName from the status
pluginName := claimInfo.DriverName
if pluginName == "" {
pluginName = claimInfo.DriverName
}
claim := &drapb.Claim{
Namespace: claimInfo.Namespace,
Uid: string(claimInfo.ClaimUID),
Name: claimInfo.ClaimName,
ResourceHandle: resourceHandle.Data,
}
if resourceHandle.StructuredData != nil {
claim.StructuredResourceHandle = []*resourceapi.StructuredResourceHandle{resourceHandle.StructuredData}
}
batches[pluginName] = append(batches[pluginName], claim)
}
return nil
})
if err != nil {
return fmt.Errorf("locked cache operation: %w", err)
}
claimInfos[resourceClaim.UID] = claimInfo
}
// Call NodePrepareResources for all claims in each batch.
@ -175,34 +187,22 @@ func (m *ManagerImpl) PrepareResources(pod *v1.Pod) error {
return fmt.Errorf("NodePrepareResources failed for claim %s/%s: %s", reqClaim.Namespace, reqClaim.Name, result.Error)
}
claimInfo := claimInfos[types.UID(claimUID)]
claim := resourceClaims[types.UID(claimUID)]
// Add the CDI Devices returned by NodePrepareResources to
// the claimInfo object.
err = claimInfo.addCDIDevices(pluginName, result.GetCDIDevices())
// Add the prepared CDI devices to the claim info
err := m.cache.withLock(func() error {
info, exists := m.cache.get(claim.Name, claim.Namespace)
if !exists {
return fmt.Errorf("unable to get claim info for claim %s in namespace %s", claim.Name, claim.Namespace)
}
if err := info.setCDIDevices(pluginName, result.GetCDIDevices()); err != nil {
return fmt.Errorf("unable to add CDI devices for plugin %s of claim %s in namespace %s", pluginName, claim.Name, claim.Namespace)
}
return nil
})
if err != nil {
return fmt.Errorf("failed to add CDIDevices to claimInfo %+v: %+v", claimInfo, err)
return fmt.Errorf("locked cache operation: %w", err)
}
// mark claim as (successfully) prepared by manager, so next time we don't prepare it.
claimInfo.prepared = true
// TODO: We (re)add the claimInfo object to the cache and
// sync it to the checkpoint *after* the
// NodePrepareResources call has completed. This will cause
// issues if the kubelet gets restarted between
// NodePrepareResources and syncToCheckpoint. It will result
// in not calling NodeUnprepareResources for this claim
// because no claimInfo will be synced back to the cache
// for it after the restart. We need to resolve this issue
// before moving to beta.
m.cache.add(claimInfo)
}
// Checkpoint to reduce redundant calls to
// NodePrepareResources after a kubelet restart.
err = m.cache.syncToCheckpoint()
if err != nil {
return fmt.Errorf("failed to checkpoint claimInfo state, err: %+v", err)
}
unfinished := len(claims) - len(response.Claims)
@ -210,11 +210,30 @@ func (m *ManagerImpl) PrepareResources(pod *v1.Pod) error {
return fmt.Errorf("NodePrepareResources left out %d claims", unfinished)
}
}
// Checkpoint to capture all of the previous addPodReference() calls.
err := m.cache.syncToCheckpoint()
// Atomically perform some operations on the claimInfo cache.
err := m.cache.withLock(func() error {
// Mark all pod claims as prepared.
for _, claim := range resourceClaims {
info, exists := m.cache.get(claim.Name, claim.Namespace)
if !exists {
return fmt.Errorf("unable to get claim info for claim %s in namespace %s", claim.Name, claim.Namespace)
}
info.setPrepared()
}
// Checkpoint to ensure all prepared claims are tracked with their list
// of CDI devices attached.
if err := m.cache.syncToCheckpoint(); err != nil {
return fmt.Errorf("failed to checkpoint claimInfo state: %w", err)
}
return nil
})
if err != nil {
return fmt.Errorf("failed to checkpoint claimInfo state, err: %+v", err)
return fmt.Errorf("locked cache operation: %w", err)
}
return nil
}
@ -277,21 +296,25 @@ func (m *ManagerImpl) GetResources(pod *v1.Pod, container *v1.Container) (*Conta
continue
}
claimInfo := m.cache.get(*claimName, pod.Namespace)
if claimInfo == nil {
return nil, fmt.Errorf("unable to get resource for namespace: %s, claim: %s", pod.Namespace, *claimName)
}
claimInfo.RLock()
claimAnnotations := claimInfo.annotationsAsList()
klog.V(3).InfoS("Add resource annotations", "claim", *claimName, "annotations", claimAnnotations)
annotations = append(annotations, claimAnnotations...)
for _, devices := range claimInfo.CDIDevices {
for _, device := range devices {
cdiDevices = append(cdiDevices, kubecontainer.CDIDevice{Name: device})
err := m.cache.withRLock(func() error {
claimInfo, exists := m.cache.get(*claimName, pod.Namespace)
if !exists {
return fmt.Errorf("unable to get claim info for claim %s in namespace %s", *claimName, pod.Namespace)
}
claimAnnotations := claimInfo.annotationsAsList()
klog.V(3).InfoS("Add resource annotations", "claim", *claimName, "annotations", claimAnnotations)
annotations = append(annotations, claimAnnotations...)
devices := claimInfo.cdiDevicesAsList()
klog.V(3).InfoS("Add CDI devices", "claim", *claimName, "CDI devices", devices)
cdiDevices = append(cdiDevices, devices...)
return nil
})
if err != nil {
return nil, fmt.Errorf("locked cache operation: %w", err)
}
claimInfo.RUnlock()
}
}
@ -303,60 +326,79 @@ func (m *ManagerImpl) GetResources(pod *v1.Pod, container *v1.Container) (*Conta
// As such, calls to the underlying NodeUnprepareResource API are skipped for claims that have
// already been successfully unprepared.
func (m *ManagerImpl) UnprepareResources(pod *v1.Pod) error {
batches := make(map[string][]*drapb.Claim)
claimInfos := make(map[types.UID]*ClaimInfo)
var claimNames []string
for i := range pod.Spec.ResourceClaims {
claimName, _, err := resourceclaim.Name(pod, &pod.Spec.ResourceClaims[i])
if err != nil {
return fmt.Errorf("unprepare resource claim: %v", err)
}
// The claim name might be nil if no underlying resource claim
// was generated for the referenced claim. There are valid use
// cases when this might happen, so we simply skip it.
if claimName == nil {
continue
}
claimNames = append(claimNames, *claimName)
}
return m.unprepareResources(pod.UID, pod.Namespace, claimNames)
}
claimInfo := m.cache.get(*claimName, pod.Namespace)
func (m *ManagerImpl) unprepareResources(podUID types.UID, namespace string, claimNames []string) error {
batches := make(map[string][]*drapb.Claim)
claimNamesMap := make(map[types.UID]string)
for _, claimName := range claimNames {
// Atomically perform some operations on the claimInfo cache.
err := m.cache.withLock(func() error {
// Get the claim info from the cache
claimInfo, exists := m.cache.get(claimName, namespace)
// Skip calling NodeUnprepareResource if claim info is not cached
if claimInfo == nil {
continue
}
// Skip calling NodeUnprepareResource if other pods are still referencing it
if len(claimInfo.PodUIDs) > 1 {
// We delay checkpointing of this change until this call returns successfully.
// It is OK to do this because we will only return successfully from this call if
// the checkpoint has succeeded. That means if the kubelet is ever restarted
// before this checkpoint succeeds, we will simply call into this (idempotent)
// function again.
claimInfo.deletePodReference(pod.UID)
continue
}
// Loop through all plugins and prepare for calling NodeUnprepareResources.
for _, resourceHandle := range claimInfo.ResourceHandles {
// If no DriverName is provided in the resourceHandle, we
// use the DriverName from the status
pluginName := resourceHandle.DriverName
if pluginName == "" {
pluginName = claimInfo.DriverName
// Skip calling NodeUnprepareResource if claim info is not cached
if !exists {
return nil
}
claim := &drapb.Claim{
Namespace: claimInfo.Namespace,
Uid: string(claimInfo.ClaimUID),
Name: claimInfo.ClaimName,
ResourceHandle: resourceHandle.Data,
// Skip calling NodeUnprepareResource if other pods are still referencing it
if len(claimInfo.PodUIDs) > 1 {
// We delay checkpointing of this change until
// UnprepareResources returns successfully. It is OK to do
// this because we will only return successfully from this call
// if the checkpoint has succeeded. That means if the kubelet
// is ever restarted before this checkpoint succeeds, we will
// simply call into this (idempotent) function again.
claimInfo.deletePodReference(podUID)
return nil
}
if resourceHandle.StructuredData != nil {
claim.StructuredResourceHandle = []*resourceapi.StructuredResourceHandle{resourceHandle.StructuredData}
// This claimInfo name will be used to update ClaimInfo cache
// after NodeUnprepareResources GRPC succeeds
claimNamesMap[claimInfo.ClaimUID] = claimInfo.ClaimName
// Loop through all plugins and prepare for calling NodeUnprepareResources.
for _, resourceHandle := range claimInfo.ResourceHandles {
// If no DriverName is provided in the resourceHandle, we
// use the DriverName from the status
pluginName := resourceHandle.DriverName
if pluginName == "" {
pluginName = claimInfo.DriverName
}
claim := &drapb.Claim{
Namespace: claimInfo.Namespace,
Uid: string(claimInfo.ClaimUID),
Name: claimInfo.ClaimName,
ResourceHandle: resourceHandle.Data,
}
if resourceHandle.StructuredData != nil {
claim.StructuredResourceHandle = []*resourceapi.StructuredResourceHandle{resourceHandle.StructuredData}
}
batches[pluginName] = append(batches[pluginName], claim)
}
batches[pluginName] = append(batches[pluginName], claim)
return nil
})
if err != nil {
return fmt.Errorf("locked cache operation: %w", err)
}
claimInfos[claimInfo.ClaimUID] = claimInfo
}
// Call NodeUnprepareResources for all claims in each batch.
@ -382,20 +424,6 @@ func (m *ManagerImpl) UnprepareResources(pod *v1.Pod) error {
if result.GetError() != "" {
return fmt.Errorf("NodeUnprepareResources failed for claim %s/%s: %s", reqClaim.Namespace, reqClaim.Name, result.Error)
}
// Delete last pod UID only if unprepare succeeds.
// This ensures that the status manager doesn't enter termination status
// for the pod. This logic is implemented in
// m.PodMightNeedToUnprepareResources and claimInfo.hasPodReference.
claimInfo := claimInfos[types.UID(claimUID)]
claimInfo.deletePodReference(pod.UID)
m.cache.delete(claimInfo.ClaimName, pod.Namespace)
}
// Checkpoint to reduce redundant calls to NodeUnprepareResources after a kubelet restart.
err = m.cache.syncToCheckpoint()
if err != nil {
return fmt.Errorf("failed to checkpoint claimInfo state, err: %+v", err)
}
unfinished := len(claims) - len(response.Claims)
@ -404,21 +432,35 @@ func (m *ManagerImpl) UnprepareResources(pod *v1.Pod) error {
}
}
// Checkpoint to capture all of the previous deletePodReference() calls.
err := m.cache.syncToCheckpoint()
// Atomically perform some operations on the claimInfo cache.
err := m.cache.withLock(func() error {
// Delete all claimInfos from the cache that have just been unprepared.
for _, claimName := range claimNamesMap {
m.cache.delete(claimName, namespace)
}
// Atomically sync the cache back to the checkpoint.
if err := m.cache.syncToCheckpoint(); err != nil {
return fmt.Errorf("failed to checkpoint claimInfo state: %w", err)
}
return nil
})
if err != nil {
return fmt.Errorf("failed to checkpoint claimInfo state, err: %+v", err)
return fmt.Errorf("locked cache operation: %w", err)
}
return nil
}
// PodMightNeedToUnprepareResources returns true if the pod might need to
// unprepare resources
func (m *ManagerImpl) PodMightNeedToUnprepareResources(UID types.UID) bool {
m.cache.Lock()
defer m.cache.Unlock()
return m.cache.hasPodReference(UID)
}
// GetCongtainerClaimInfos gets Container's ClaimInfo
// GetContainerClaimInfos gets Container's ClaimInfo
func (m *ManagerImpl) GetContainerClaimInfos(pod *v1.Pod, container *v1.Container) ([]*ClaimInfo, error) {
claimInfos := make([]*ClaimInfo, 0, len(pod.Spec.ResourceClaims))
@ -432,11 +474,18 @@ func (m *ManagerImpl) GetContainerClaimInfos(pod *v1.Pod, container *v1.Containe
if podResourceClaim.Name != claim.Name {
continue
}
claimInfo := m.cache.get(*claimName, pod.Namespace)
if claimInfo == nil {
return nil, fmt.Errorf("unable to get resource for namespace: %s, claim: %s", pod.Namespace, *claimName)
err := m.cache.withRLock(func() error {
claimInfo, exists := m.cache.get(*claimName, pod.Namespace)
if !exists {
return fmt.Errorf("unable to get claim info for claim %s in namespace %s", *claimName, pod.Namespace)
}
claimInfos = append(claimInfos, claimInfo.DeepCopy())
return nil
})
if err != nil {
return nil, fmt.Errorf("locked cache operation: %w", err)
}
claimInfos = append(claimInfos, claimInfo)
}
}
return claimInfos, nil

View File

@ -36,6 +36,7 @@ type CheckpointState interface {
}
// ClaimInfoState is used to store claim info state in a checkpoint
// +k8s:deepcopy-gen=true
type ClaimInfoState struct {
// Name of the DRA driver
DriverName string

View File

@ -0,0 +1,72 @@
//go:build !ignore_autogenerated
// +build !ignore_autogenerated
/*
Copyright The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Code generated by deepcopy-gen. DO NOT EDIT.
package state
import (
v1alpha2 "k8s.io/api/resource/v1alpha2"
sets "k8s.io/apimachinery/pkg/util/sets"
)
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ClaimInfoState) DeepCopyInto(out *ClaimInfoState) {
*out = *in
if in.PodUIDs != nil {
in, out := &in.PodUIDs, &out.PodUIDs
*out = make(sets.Set[string], len(*in))
for key, val := range *in {
(*out)[key] = val
}
}
if in.ResourceHandles != nil {
in, out := &in.ResourceHandles, &out.ResourceHandles
*out = make([]v1alpha2.ResourceHandle, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
if in.CDIDevices != nil {
in, out := &in.CDIDevices, &out.CDIDevices
*out = make(map[string][]string, len(*in))
for key, val := range *in {
var outVal []string
if val == nil {
(*out)[key] = nil
} else {
in, out := &val, &outVal
*out = make([]string, len(*in))
copy(*out, *in)
}
(*out)[key] = outVal
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClaimInfoState.
func (in *ClaimInfoState) DeepCopy() *ClaimInfoState {
if in == nil {
return nil
}
out := new(ClaimInfoState)
in.DeepCopyInto(out)
return out
}

View File

@ -0,0 +1,58 @@
//go:build !ignore_autogenerated
// +build !ignore_autogenerated
/*
Copyright The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Code generated by deepcopy-gen. DO NOT EDIT.
package dra
import (
container "k8s.io/kubernetes/pkg/kubelet/container"
)
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ClaimInfo) DeepCopyInto(out *ClaimInfo) {
*out = *in
in.ClaimInfoState.DeepCopyInto(&out.ClaimInfoState)
if in.annotations != nil {
in, out := &in.annotations, &out.annotations
*out = make(map[string][]container.Annotation, len(*in))
for key, val := range *in {
var outVal []container.Annotation
if val == nil {
(*out)[key] = nil
} else {
in, out := &val, &outVal
*out = make([]container.Annotation, len(*in))
copy(*out, *in)
}
(*out)[key] = outVal
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClaimInfo.
func (in *ClaimInfo) DeepCopy() *ClaimInfo {
if in == nil {
return nil
}
out := new(ClaimInfo)
in.DeepCopyInto(out)
return out
}