diff --git a/pkg/kubelet/gpu/nvidia/nvidia_gpu_manager.go b/pkg/kubelet/gpu/nvidia/nvidia_gpu_manager.go index 7d7176603e4..8f4b5c6437b 100644 --- a/pkg/kubelet/gpu/nvidia/nvidia_gpu_manager.go +++ b/pkg/kubelet/gpu/nvidia/nvidia_gpu_manager.go @@ -48,7 +48,7 @@ const ( type activePodsLister interface { // Returns a list of active pods on the node. - GetRunningPods() ([]*v1.Pod, error) + GetActivePods() []*v1.Pod } // nvidiaGPUManager manages nvidia gpu devices. @@ -148,9 +148,7 @@ func (ngm *nvidiaGPUManager) AllocateGPU(pod *v1.Pod, container *v1.Container) ( ngm.allocated = allocated } else { // update internal list of GPUs in use prior to allocating new GPUs. - if err := ngm.updateAllocatedGPUs(); err != nil { - return nil, fmt.Errorf("Failed to allocate GPUs because of issues with updating GPUs in use: %v", err) - } + ngm.updateAllocatedGPUs() } // Check if GPUs have already been allocated. If so return them right away. // This can happen if a container restarts for example. @@ -179,13 +177,10 @@ func (ngm *nvidiaGPUManager) AllocateGPU(pod *v1.Pod, container *v1.Container) ( } // updateAllocatedGPUs updates the list of GPUs in use. -// It gets a list of running pods and then frees any GPUs that are bound to terminated pods. +// It gets a list of active pods and then frees any GPUs that are bound to terminated pods. // Returns error on failure. -func (ngm *nvidiaGPUManager) updateAllocatedGPUs() error { - activePods, err := ngm.activePodsLister.GetRunningPods() - if err != nil { - return fmt.Errorf("Failed to list active pods: %v", err) - } +func (ngm *nvidiaGPUManager) updateAllocatedGPUs() { + activePods := ngm.activePodsLister.GetActivePods() activePodUids := sets.NewString() for _, pod := range activePods { activePodUids.Insert(string(pod.UID)) @@ -194,7 +189,6 @@ func (ngm *nvidiaGPUManager) updateAllocatedGPUs() error { podsToBeRemoved := allocatedPodUids.Difference(activePodUids) glog.V(5).Infof("pods to be removed: %v", podsToBeRemoved.List()) ngm.allocated.delete(podsToBeRemoved.List()) - return nil } // discoverGPUs identifies allGPUs NVIDIA GPU devices available on the local node by walking `/dev` directory. @@ -224,10 +218,7 @@ func (ngm *nvidiaGPUManager) discoverGPUs() error { // gpusInUse returns a list of GPUs in use along with the respective pods that are using it. func (ngm *nvidiaGPUManager) gpusInUse() (*podGPUs, error) { - pods, err := ngm.activePodsLister.GetRunningPods() - if err != nil { - return nil, err - } + pods := ngm.activePodsLister.GetActivePods() type containerIdentifier struct { id string name string diff --git a/pkg/kubelet/gpu/nvidia/nvidia_gpu_manager_test.go b/pkg/kubelet/gpu/nvidia/nvidia_gpu_manager_test.go index e4565525e49..db80f03cf1e 100644 --- a/pkg/kubelet/gpu/nvidia/nvidia_gpu_manager_test.go +++ b/pkg/kubelet/gpu/nvidia/nvidia_gpu_manager_test.go @@ -32,8 +32,8 @@ type testActivePodsLister struct { activePods []*v1.Pod } -func (tapl *testActivePodsLister) GetRunningPods() ([]*v1.Pod, error) { - return tapl.activePods, nil +func (tapl *testActivePodsLister) GetActivePods() []*v1.Pod { + return tapl.activePods } func makeTestPod(numContainers, gpusPerContainer int) *v1.Pod { diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 14bcce91f8b..9584ef5f1b0 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -792,7 +792,7 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub klet.AddPodSyncLoopHandler(activeDeadlineHandler) klet.AddPodSyncHandler(activeDeadlineHandler) - criticalPodAdmissionHandler := preemption.NewCriticalPodAdmissionHandler(klet.getActivePods, killPodNow(klet.podWorkers, kubeDeps.Recorder), kubeDeps.Recorder) + criticalPodAdmissionHandler := preemption.NewCriticalPodAdmissionHandler(klet.GetActivePods, killPodNow(klet.podWorkers, kubeDeps.Recorder), kubeDeps.Recorder) klet.admitHandlers.AddPodAdmitHandler(lifecycle.NewPredicateAdmitHandler(klet.getNodeAnyWay, criticalPodAdmissionHandler)) // apply functional Option's for _, opt := range kubeDeps.Options { @@ -1204,7 +1204,7 @@ func (kl *Kubelet) initializeModules() error { return fmt.Errorf("Kubelet failed to get node info: %v", err) } - if err := kl.containerManager.Start(node, kl.getActivePods); err != nil { + if err := kl.containerManager.Start(node, kl.GetActivePods); err != nil { return fmt.Errorf("Failed to start ContainerManager %v", err) } @@ -1230,7 +1230,7 @@ func (kl *Kubelet) initializeRuntimeDependentModules() { glog.Fatalf("Failed to start cAdvisor %v", err) } // eviction manager must start after cadvisor because it needs to know if the container runtime has a dedicated imagefs - kl.evictionManager.Start(kl, kl.getActivePods, kl, evictionMonitoringPeriod) + kl.evictionManager.Start(kl, kl.GetActivePods, kl, evictionMonitoringPeriod) } // Run starts the kubelet reacting to config updates diff --git a/pkg/kubelet/kubelet_pods.go b/pkg/kubelet/kubelet_pods.go index d96627f9f7f..e1eda16586a 100644 --- a/pkg/kubelet/kubelet_pods.go +++ b/pkg/kubelet/kubelet_pods.go @@ -76,8 +76,8 @@ func (kl *Kubelet) listPodsFromDisk() ([]types.UID, error) { return pods, nil } -// getActivePods returns non-terminal pods -func (kl *Kubelet) getActivePods() []*v1.Pod { +// GetActivePods returns non-terminal pods +func (kl *Kubelet) GetActivePods() []*v1.Pod { allPods := kl.podManager.GetPods() activePods := kl.filterOutTerminatedPods(allPods) return activePods