From f3b9874485718d8c4150b298fdd9bd7ad64e0e51 Mon Sep 17 00:00:00 2001 From: tianshapjq Date: Fri, 19 May 2017 16:47:10 +0800 Subject: [PATCH] gpusInUse info error when kubelet restarts --- pkg/kubelet/gpu/nvidia/nvidia_gpu_manager.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/kubelet/gpu/nvidia/nvidia_gpu_manager.go b/pkg/kubelet/gpu/nvidia/nvidia_gpu_manager.go index 807cc8afd3c..5633c1bfe60 100644 --- a/pkg/kubelet/gpu/nvidia/nvidia_gpu_manager.go +++ b/pkg/kubelet/gpu/nvidia/nvidia_gpu_manager.go @@ -22,6 +22,7 @@ import ( "os" "path" "regexp" + "strings" "sync" "github.com/golang/glog" @@ -101,8 +102,7 @@ func (ngm *nvidiaGPUManager) Start() error { if err := ngm.discoverGPUs(); err != nil { return err } - // It's possible that the runtime isn't available now. - ngm.allocated = ngm.gpusInUse() + // We ignore errors when identifying allocated GPUs because it is possible that the runtime interfaces may be not be logically up. return nil } @@ -239,7 +239,7 @@ func (ngm *nvidiaGPUManager) gpusInUse() *podGPUs { var containersToInspect []containerIdentifier for _, container := range pod.Status.ContainerStatuses { if containers.Has(container.Name) { - containersToInspect = append(containersToInspect, containerIdentifier{container.ContainerID, container.Name}) + containersToInspect = append(containersToInspect, containerIdentifier{strings.Replace(container.ContainerID, "docker://", "", 1), container.Name}) } } // add the pod and its containers that need to be inspected.