kubelet dra: lock before getting claimInfo CDIDevices and annotations fields

Currently claimInfo CDIDevices and annotations access directly without RLock.
This can lead to concurrent read write error.

To avoid it we added RLock all before getting the CDIDevices and annotations

Signed-off-by: Moshe Levi <moshele@nvidia.com>
This commit is contained in:
Moshe Levi 2023-03-19 09:46:35 +02:00
parent 37937bb227
commit 04ad946e8f
2 changed files with 6 additions and 2 deletions

View File

@ -978,6 +978,7 @@ func (cm *containerManagerImpl) GetDynamicResources(pod *v1.Pod, container *v1.C
}
for _, containerClaimInfo := range containerClaimInfos {
var claimResources []*podresourcesapi.ClaimResource
containerClaimInfo.RLock()
// TODO: Currently we maintain a list of ClaimResources, each of which contains
// a set of CDIDevices from a different kubelet plugin. In the future we may want to
// include the name of the kubelet plugin and/or other types of resources that are
@ -989,6 +990,7 @@ func (cm *containerManagerImpl) GetDynamicResources(pod *v1.Pod, container *v1.C
}
claimResources = append(claimResources, &podresourcesapi.ClaimResource{CDIDevices: cdiDevices})
}
containerClaimInfo.RUnlock()
containerDynamicResource := podresourcesapi.DynamicResource{
ClassName: containerClaimInfo.ClassName,
ClaimName: containerClaimInfo.ClaimName,

View File

@ -202,6 +202,7 @@ func (m *ManagerImpl) GetResources(pod *v1.Pod, container *v1.Container) (*Conta
return nil, fmt.Errorf("unable to get resource for namespace: %s, claim: %s", pod.Namespace, claimName)
}
claimInfo.RLock()
klog.V(3).InfoS("Add resource annotations", "claim", claimName, "annotations", claimInfo.annotations)
annotations = append(annotations, claimInfo.annotations...)
for _, devices := range claimInfo.CDIDevices {
@ -209,6 +210,7 @@ func (m *ManagerImpl) GetResources(pod *v1.Pod, container *v1.Container) (*Conta
cdiDevices = append(cdiDevices, kubecontainer.CDIDevice{Name: device})
}
}
claimInfo.RUnlock()
}
}
@ -282,8 +284,8 @@ func (m *ManagerImpl) UnprepareResources(pod *v1.Pod) error {
resourceHandle.Data)
if err != nil {
return fmt.Errorf(
"NodeUnprepareResource failed, pod: %s, claim UID: %s, claim name: %s, CDI devices: %s, err: %+v",
pod.Name, claimInfo.ClaimUID, claimInfo.ClaimName, claimInfo.CDIDevices, err)
"NodeUnprepareResource failed, pod: %s, claim UID: %s, claim name: %s, resource handle: %s, err: %+v",
pod.Name, claimInfo.ClaimUID, claimInfo.ClaimName, resourceHandle.Data, err)
}
klog.V(3).InfoS("NodeUnprepareResource succeeded", "response", response)
}