diff --git a/pkg/kubelet/gpu/nvidia/nvidia_gpu_manager.go b/pkg/kubelet/gpu/nvidia/nvidia_gpu_manager.go index 807cc8afd3c..5633c1bfe60 100644 --- a/pkg/kubelet/gpu/nvidia/nvidia_gpu_manager.go +++ b/pkg/kubelet/gpu/nvidia/nvidia_gpu_manager.go @@ -22,6 +22,7 @@ import ( "os" "path" "regexp" + "strings" "sync" "github.com/golang/glog" @@ -101,8 +102,7 @@ func (ngm *nvidiaGPUManager) Start() error { if err := ngm.discoverGPUs(); err != nil { return err } - // It's possible that the runtime isn't available now. - ngm.allocated = ngm.gpusInUse() + // We ignore errors when identifying allocated GPUs because it is possible that the runtime interfaces may be not be logically up. return nil } @@ -239,7 +239,7 @@ func (ngm *nvidiaGPUManager) gpusInUse() *podGPUs { var containersToInspect []containerIdentifier for _, container := range pod.Status.ContainerStatuses { if containers.Has(container.Name) { - containersToInspect = append(containersToInspect, containerIdentifier{container.ContainerID, container.Name}) + containersToInspect = append(containersToInspect, containerIdentifier{strings.Replace(container.ContainerID, "docker://", "", 1), container.Name}) } } // add the pod and its containers that need to be inspected.