use active pods instead of runtime pods in gpu manager

Signed-off-by: Vishnu kannan <vishnuk@google.com>
This commit is contained in:
Vishnu kannan 2017-03-11 10:43:24 -08:00
parent 8ed9bff073
commit ff158090b3
4 changed files with 13 additions and 22 deletions

View File

@ -48,7 +48,7 @@ const (
type activePodsLister interface {
// Returns a list of active pods on the node.
GetRunningPods() ([]*v1.Pod, error)
GetActivePods() []*v1.Pod
}
// nvidiaGPUManager manages nvidia gpu devices.
@ -148,9 +148,7 @@ func (ngm *nvidiaGPUManager) AllocateGPU(pod *v1.Pod, container *v1.Container) (
ngm.allocated = allocated
} else {
// update internal list of GPUs in use prior to allocating new GPUs.
if err := ngm.updateAllocatedGPUs(); err != nil {
return nil, fmt.Errorf("Failed to allocate GPUs because of issues with updating GPUs in use: %v", err)
}
ngm.updateAllocatedGPUs()
}
// Check if GPUs have already been allocated. If so return them right away.
// This can happen if a container restarts for example.
@ -179,13 +177,10 @@ func (ngm *nvidiaGPUManager) AllocateGPU(pod *v1.Pod, container *v1.Container) (
}
// updateAllocatedGPUs updates the list of GPUs in use.
// It gets a list of running pods and then frees any GPUs that are bound to terminated pods.
// It gets a list of active pods and then frees any GPUs that are bound to terminated pods.
// Returns error on failure.
func (ngm *nvidiaGPUManager) updateAllocatedGPUs() error {
activePods, err := ngm.activePodsLister.GetRunningPods()
if err != nil {
return fmt.Errorf("Failed to list active pods: %v", err)
}
func (ngm *nvidiaGPUManager) updateAllocatedGPUs() {
activePods := ngm.activePodsLister.GetActivePods()
activePodUids := sets.NewString()
for _, pod := range activePods {
activePodUids.Insert(string(pod.UID))
@ -194,7 +189,6 @@ func (ngm *nvidiaGPUManager) updateAllocatedGPUs() error {
podsToBeRemoved := allocatedPodUids.Difference(activePodUids)
glog.V(5).Infof("pods to be removed: %v", podsToBeRemoved.List())
ngm.allocated.delete(podsToBeRemoved.List())
return nil
}
// discoverGPUs identifies allGPUs NVIDIA GPU devices available on the local node by walking `/dev` directory.
@ -224,10 +218,7 @@ func (ngm *nvidiaGPUManager) discoverGPUs() error {
// gpusInUse returns a list of GPUs in use along with the respective pods that are using it.
func (ngm *nvidiaGPUManager) gpusInUse() (*podGPUs, error) {
pods, err := ngm.activePodsLister.GetRunningPods()
if err != nil {
return nil, err
}
pods := ngm.activePodsLister.GetActivePods()
type containerIdentifier struct {
id string
name string

View File

@ -32,8 +32,8 @@ type testActivePodsLister struct {
activePods []*v1.Pod
}
func (tapl *testActivePodsLister) GetRunningPods() ([]*v1.Pod, error) {
return tapl.activePods, nil
func (tapl *testActivePodsLister) GetActivePods() []*v1.Pod {
return tapl.activePods
}
func makeTestPod(numContainers, gpusPerContainer int) *v1.Pod {

View File

@ -792,7 +792,7 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub
klet.AddPodSyncLoopHandler(activeDeadlineHandler)
klet.AddPodSyncHandler(activeDeadlineHandler)
criticalPodAdmissionHandler := preemption.NewCriticalPodAdmissionHandler(klet.getActivePods, killPodNow(klet.podWorkers, kubeDeps.Recorder), kubeDeps.Recorder)
criticalPodAdmissionHandler := preemption.NewCriticalPodAdmissionHandler(klet.GetActivePods, killPodNow(klet.podWorkers, kubeDeps.Recorder), kubeDeps.Recorder)
klet.admitHandlers.AddPodAdmitHandler(lifecycle.NewPredicateAdmitHandler(klet.getNodeAnyWay, criticalPodAdmissionHandler))
// apply functional Option's
for _, opt := range kubeDeps.Options {
@ -1204,7 +1204,7 @@ func (kl *Kubelet) initializeModules() error {
return fmt.Errorf("Kubelet failed to get node info: %v", err)
}
if err := kl.containerManager.Start(node, kl.getActivePods); err != nil {
if err := kl.containerManager.Start(node, kl.GetActivePods); err != nil {
return fmt.Errorf("Failed to start ContainerManager %v", err)
}
@ -1230,7 +1230,7 @@ func (kl *Kubelet) initializeRuntimeDependentModules() {
glog.Fatalf("Failed to start cAdvisor %v", err)
}
// eviction manager must start after cadvisor because it needs to know if the container runtime has a dedicated imagefs
kl.evictionManager.Start(kl, kl.getActivePods, kl, evictionMonitoringPeriod)
kl.evictionManager.Start(kl, kl.GetActivePods, kl, evictionMonitoringPeriod)
}
// Run starts the kubelet reacting to config updates

View File

@ -76,8 +76,8 @@ func (kl *Kubelet) listPodsFromDisk() ([]types.UID, error) {
return pods, nil
}
// getActivePods returns non-terminal pods
func (kl *Kubelet) getActivePods() []*v1.Pod {
// GetActivePods returns non-terminal pods
func (kl *Kubelet) GetActivePods() []*v1.Pod {
allPods := kl.podManager.GetPods()
activePods := kl.filterOutTerminatedPods(allPods)
return activePods