Device Manager - Refactor allocatePodResources

- allocatePodResources logic altered to allow for container by container
device allocation.
- New type PodReusableDevices
- New field in devicemanager devicesToReuse
This commit is contained in:
nolancon 2020-02-07 08:49:59 +00:00
parent 0a9bd0334d
commit cb9fdc49db
2 changed files with 34 additions and 40 deletions

View File

@ -105,6 +105,10 @@ type ManagerImpl struct {
// Store of Topology Affinties that the Device Manager can query. // Store of Topology Affinties that the Device Manager can query.
topologyAffinityStore topologymanager.Store topologyAffinityStore topologymanager.Store
// devicesToReuse contains devices that can be reused as they have been allocated to
// init containers.
devicesToReuse PodReusableDevices
} }
type endpointInfo struct { type endpointInfo struct {
@ -114,6 +118,9 @@ type endpointInfo struct {
type sourcesReadyStub struct{} type sourcesReadyStub struct{}
// PodReusableDevices is a map by pod name of devices to reuse.
type PodReusableDevices map[string]map[string]sets.String
func (s *sourcesReadyStub) AddSource(source string) {} func (s *sourcesReadyStub) AddSource(source string) {}
func (s *sourcesReadyStub) AllReady() bool { return true } func (s *sourcesReadyStub) AllReady() bool { return true }
@ -147,6 +154,7 @@ func newManagerImpl(socketPath string, numaNodeInfo cputopology.NUMANodeInfo, to
podDevices: make(podDevices), podDevices: make(podDevices),
numaNodes: numaNodes, numaNodes: numaNodes,
topologyAffinityStore: topologyAffinityStore, topologyAffinityStore: topologyAffinityStore,
devicesToReuse: make(PodReusableDevices),
} }
manager.callback = manager.genericDeviceUpdateCallback manager.callback = manager.genericDeviceUpdateCallback
@ -350,54 +358,39 @@ func (m *ManagerImpl) isVersionCompatibleWithPlugin(versions []string) bool {
return false return false
} }
func (m *ManagerImpl) allocatePodResources(pod *v1.Pod) error {
devicesToReuse := make(map[string]sets.String)
for _, container := range pod.Spec.InitContainers {
if err := m.allocateContainerResources(pod, &container, devicesToReuse); err != nil {
return err
}
m.podDevices.addContainerAllocatedResources(string(pod.UID), container.Name, devicesToReuse)
}
for _, container := range pod.Spec.Containers {
if err := m.allocateContainerResources(pod, &container, devicesToReuse); err != nil {
return err
}
m.podDevices.removeContainerAllocatedResources(string(pod.UID), container.Name, devicesToReuse)
}
return nil
}
// Allocate is the call that you can use to allocate a set of devices // Allocate is the call that you can use to allocate a set of devices
// from the registered device plugins. // from the registered device plugins.
func (m *ManagerImpl) Allocate(pod *v1.Pod, container *v1.Container) error { func (m *ManagerImpl) Allocate(pod *v1.Pod, container *v1.Container) error {
// TODO: This function does not yet do what it is supposed to. The call to if _, ok := m.devicesToReuse[string(pod.UID)]; !ok {
// allocatePodResources() below is still allocating devices to a pod all at m.devicesToReuse[string(pod.UID)] = make(map[string]sets.String)
// once. We need to unroll this logic to allow it to allocate devices on a }
// container-by-container basis instead. The main challenge will be // If pod entries to m.devicesToReuse other than the current pod exist, delete them.
// ensuring that we "reuse" devices from init containers when allocating for podUID := range m.devicesToReuse {
// devices to app containers (just as the logic inside if podUID != string(pod.UID) {
// allocatePodResources() currently does). The hard part being that we will delete(m.devicesToReuse, podUID)
// need to maintain the 'devicesToReuse' present in allocatePodResources() }
// across invocations of Allocate(). }
// // Allocate resources for init containers first as we know the caller always loops
// My initial inclination to solve this with the least coode churn is: // through init containers before looping through app containers. Should the caller
// 1) Create a new type called PodReusableDevices, defined as: // ever change those semantics, this logic will need to be amended.
// type PodReusableDevices map[string]map[string]sets.String for _, initContainer := range pod.Spec.InitContainers {
// 2) Instantiate a PodReusableDevices map as a new field of the if container.Name == initContainer.Name {
// devicemanager called devicesToReuse (similar to the local if err := m.allocateContainerResources(pod, container, m.devicesToReuse[string(pod.UID)]); err != nil {
// devicesToReuse variable currently in allocatePodResources) return err
// 3) Use devicesToReuse[string(pod.UID) just as devicesToReuse is used }
// today, being careful to create / destroy the nested maps where m.podDevices.addContainerAllocatedResources(string(pod.UID), container.Name, m.devicesToReuse[string(pod.UID)])
// appropriate. return nil
}
err := m.allocatePodResources(pod) }
if err != nil { if err := m.allocateContainerResources(pod, container, m.devicesToReuse[string(pod.UID)]); err != nil {
klog.Errorf("Failed to allocate device plugin resource for pod %s, container %s: %v", string(pod.UID), container.Name, err)
return err return err
} }
m.podDevices.removeContainerAllocatedResources(string(pod.UID), container.Name, m.devicesToReuse[string(pod.UID)])
return nil return nil
} }
// UpdatePluginResources updates node resources based on devices already allocated to pods.
func (m *ManagerImpl) UpdatePluginResources(node *schedulernodeinfo.NodeInfo, attrs *lifecycle.PodAdmitAttributes) error { func (m *ManagerImpl) UpdatePluginResources(node *schedulernodeinfo.NodeInfo, attrs *lifecycle.PodAdmitAttributes) error {
pod := attrs.Pod pod := attrs.Pod

View File

@ -605,6 +605,7 @@ func getTestManager(tmpDir string, activePods ActivePodsFunc, testRes []TestReso
allocatedDevices: make(map[string]sets.String), allocatedDevices: make(map[string]sets.String),
endpoints: make(map[string]endpointInfo), endpoints: make(map[string]endpointInfo),
podDevices: make(podDevices), podDevices: make(podDevices),
devicesToReuse: make(PodReusableDevices),
topologyAffinityStore: topologymanager.NewFakeManager(), topologyAffinityStore: topologymanager.NewFakeManager(),
activePods: activePods, activePods: activePods,
sourcesReady: &sourcesReadyStub{}, sourcesReady: &sourcesReadyStub{},