mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-01 07:47:56 +00:00
Device Manager - Refactor allocatePodResources
- allocatePodResources logic altered to allow for container by container device allocation. - New type PodReusableDevices - New field in devicemanager devicesToReuse
This commit is contained in:
parent
0a9bd0334d
commit
cb9fdc49db
@ -105,6 +105,10 @@ type ManagerImpl struct {
|
|||||||
|
|
||||||
// Store of Topology Affinties that the Device Manager can query.
|
// Store of Topology Affinties that the Device Manager can query.
|
||||||
topologyAffinityStore topologymanager.Store
|
topologyAffinityStore topologymanager.Store
|
||||||
|
|
||||||
|
// devicesToReuse contains devices that can be reused as they have been allocated to
|
||||||
|
// init containers.
|
||||||
|
devicesToReuse PodReusableDevices
|
||||||
}
|
}
|
||||||
|
|
||||||
type endpointInfo struct {
|
type endpointInfo struct {
|
||||||
@ -114,6 +118,9 @@ type endpointInfo struct {
|
|||||||
|
|
||||||
type sourcesReadyStub struct{}
|
type sourcesReadyStub struct{}
|
||||||
|
|
||||||
|
// PodReusableDevices is a map by pod name of devices to reuse.
|
||||||
|
type PodReusableDevices map[string]map[string]sets.String
|
||||||
|
|
||||||
func (s *sourcesReadyStub) AddSource(source string) {}
|
func (s *sourcesReadyStub) AddSource(source string) {}
|
||||||
func (s *sourcesReadyStub) AllReady() bool { return true }
|
func (s *sourcesReadyStub) AllReady() bool { return true }
|
||||||
|
|
||||||
@ -147,6 +154,7 @@ func newManagerImpl(socketPath string, numaNodeInfo cputopology.NUMANodeInfo, to
|
|||||||
podDevices: make(podDevices),
|
podDevices: make(podDevices),
|
||||||
numaNodes: numaNodes,
|
numaNodes: numaNodes,
|
||||||
topologyAffinityStore: topologyAffinityStore,
|
topologyAffinityStore: topologyAffinityStore,
|
||||||
|
devicesToReuse: make(PodReusableDevices),
|
||||||
}
|
}
|
||||||
manager.callback = manager.genericDeviceUpdateCallback
|
manager.callback = manager.genericDeviceUpdateCallback
|
||||||
|
|
||||||
@ -350,54 +358,39 @@ func (m *ManagerImpl) isVersionCompatibleWithPlugin(versions []string) bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *ManagerImpl) allocatePodResources(pod *v1.Pod) error {
|
|
||||||
devicesToReuse := make(map[string]sets.String)
|
|
||||||
for _, container := range pod.Spec.InitContainers {
|
|
||||||
if err := m.allocateContainerResources(pod, &container, devicesToReuse); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
m.podDevices.addContainerAllocatedResources(string(pod.UID), container.Name, devicesToReuse)
|
|
||||||
}
|
|
||||||
for _, container := range pod.Spec.Containers {
|
|
||||||
if err := m.allocateContainerResources(pod, &container, devicesToReuse); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
m.podDevices.removeContainerAllocatedResources(string(pod.UID), container.Name, devicesToReuse)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Allocate is the call that you can use to allocate a set of devices
|
// Allocate is the call that you can use to allocate a set of devices
|
||||||
// from the registered device plugins.
|
// from the registered device plugins.
|
||||||
func (m *ManagerImpl) Allocate(pod *v1.Pod, container *v1.Container) error {
|
func (m *ManagerImpl) Allocate(pod *v1.Pod, container *v1.Container) error {
|
||||||
// TODO: This function does not yet do what it is supposed to. The call to
|
if _, ok := m.devicesToReuse[string(pod.UID)]; !ok {
|
||||||
// allocatePodResources() below is still allocating devices to a pod all at
|
m.devicesToReuse[string(pod.UID)] = make(map[string]sets.String)
|
||||||
// once. We need to unroll this logic to allow it to allocate devices on a
|
}
|
||||||
// container-by-container basis instead. The main challenge will be
|
// If pod entries to m.devicesToReuse other than the current pod exist, delete them.
|
||||||
// ensuring that we "reuse" devices from init containers when allocating
|
for podUID := range m.devicesToReuse {
|
||||||
// devices to app containers (just as the logic inside
|
if podUID != string(pod.UID) {
|
||||||
// allocatePodResources() currently does). The hard part being that we will
|
delete(m.devicesToReuse, podUID)
|
||||||
// need to maintain the 'devicesToReuse' present in allocatePodResources()
|
}
|
||||||
// across invocations of Allocate().
|
}
|
||||||
//
|
// Allocate resources for init containers first as we know the caller always loops
|
||||||
// My initial inclination to solve this with the least coode churn is:
|
// through init containers before looping through app containers. Should the caller
|
||||||
// 1) Create a new type called PodReusableDevices, defined as:
|
// ever change those semantics, this logic will need to be amended.
|
||||||
// type PodReusableDevices map[string]map[string]sets.String
|
for _, initContainer := range pod.Spec.InitContainers {
|
||||||
// 2) Instantiate a PodReusableDevices map as a new field of the
|
if container.Name == initContainer.Name {
|
||||||
// devicemanager called devicesToReuse (similar to the local
|
if err := m.allocateContainerResources(pod, container, m.devicesToReuse[string(pod.UID)]); err != nil {
|
||||||
// devicesToReuse variable currently in allocatePodResources)
|
return err
|
||||||
// 3) Use devicesToReuse[string(pod.UID) just as devicesToReuse is used
|
}
|
||||||
// today, being careful to create / destroy the nested maps where
|
m.podDevices.addContainerAllocatedResources(string(pod.UID), container.Name, m.devicesToReuse[string(pod.UID)])
|
||||||
// appropriate.
|
return nil
|
||||||
|
}
|
||||||
err := m.allocatePodResources(pod)
|
}
|
||||||
if err != nil {
|
if err := m.allocateContainerResources(pod, container, m.devicesToReuse[string(pod.UID)]); err != nil {
|
||||||
klog.Errorf("Failed to allocate device plugin resource for pod %s, container %s: %v", string(pod.UID), container.Name, err)
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
m.podDevices.removeContainerAllocatedResources(string(pod.UID), container.Name, m.devicesToReuse[string(pod.UID)])
|
||||||
return nil
|
return nil
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// UpdatePluginResources updates node resources based on devices already allocated to pods.
|
||||||
func (m *ManagerImpl) UpdatePluginResources(node *schedulernodeinfo.NodeInfo, attrs *lifecycle.PodAdmitAttributes) error {
|
func (m *ManagerImpl) UpdatePluginResources(node *schedulernodeinfo.NodeInfo, attrs *lifecycle.PodAdmitAttributes) error {
|
||||||
pod := attrs.Pod
|
pod := attrs.Pod
|
||||||
|
|
||||||
|
@ -605,6 +605,7 @@ func getTestManager(tmpDir string, activePods ActivePodsFunc, testRes []TestReso
|
|||||||
allocatedDevices: make(map[string]sets.String),
|
allocatedDevices: make(map[string]sets.String),
|
||||||
endpoints: make(map[string]endpointInfo),
|
endpoints: make(map[string]endpointInfo),
|
||||||
podDevices: make(podDevices),
|
podDevices: make(podDevices),
|
||||||
|
devicesToReuse: make(PodReusableDevices),
|
||||||
topologyAffinityStore: topologymanager.NewFakeManager(),
|
topologyAffinityStore: topologymanager.NewFakeManager(),
|
||||||
activePods: activePods,
|
activePods: activePods,
|
||||||
sourcesReady: &sourcesReadyStub{},
|
sourcesReady: &sourcesReadyStub{},
|
||||||
|
Loading…
Reference in New Issue
Block a user