diff --git a/pkg/kubelet/cm/devicemanager/manager.go b/pkg/kubelet/cm/devicemanager/manager.go index 86f04c587d8..8539069587f 100644 --- a/pkg/kubelet/cm/devicemanager/manager.go +++ b/pkg/kubelet/cm/devicemanager/manager.go @@ -369,10 +369,30 @@ func (m *ManagerImpl) allocatePodResources(pod *v1.Pod) error { // Allocate is the call that you can use to allocate a set of devices // from the registered device plugins. -func (m *ManagerImpl) Allocate(pod *v1.Pod) error { +func (m *ManagerImpl) Allocate(pod *v1.Pod, container *v1.Container) error { + // TODO: This function does not yet do what it is supposed to. The call to + // allocatePodResources() below is still allocating devices to a pod all at + // once. We need to unroll this logic to allow it to allocate devices on a + // container-by-container basis instead. The main challenge will be + // ensuring that we "reuse" devices from init containers when allocating + // devices to app containers (just as the logic inside + // allocatePodResources() currently does). The hard part being that we will + // need to maintain the 'devicesToReuse' present in allocatePodResources() + // across invocations of Allocate(). + // + // My initial inclination to solve this with the least coode churn is: + // 1) Create a new type called PodReusableDevices, defined as: + // type PodReusableDevices map[string]map[string]sets.String + // 2) Instantiate a PodReusableDevices map as a new field of the + // devicemanager called devicesToReuse (similar to the local + // devicesToReuse variable currently in allocatePodResources) + // 3) Use devicesToReuse[string(pod.UID) just as devicesToReuse is used + // today, being careful to create / destroy the nested maps where + // appropriate. + err := m.allocatePodResources(pod) if err != nil { - klog.Errorf("Failed to allocate device plugin resource for pod %s: %v", string(pod.UID), err) + klog.Errorf("Failed to allocate device plugin resource for pod %s, container %s: %v", string(pod.UID), container.Name, err) return err } return nil @@ -864,8 +884,8 @@ func (m *ManagerImpl) GetDeviceRunContainerOptions(pod *v1.Pod, container *v1.Co } } if needsReAllocate { - klog.V(2).Infof("needs re-allocate device plugin resources for pod %s", podUID) - if err := m.allocatePodResources(pod); err != nil { + klog.V(2).Infof("needs re-allocate device plugin resources for pod %s, container %s", podUID, container.Name) + if err := m.Allocate(pod, container); err != nil { return nil, err } } diff --git a/pkg/kubelet/cm/devicemanager/manager_stub.go b/pkg/kubelet/cm/devicemanager/manager_stub.go index 6cb6aed62ef..ed6fb41e58e 100644 --- a/pkg/kubelet/cm/devicemanager/manager_stub.go +++ b/pkg/kubelet/cm/devicemanager/manager_stub.go @@ -45,7 +45,7 @@ func (h *ManagerStub) Stop() error { } // Allocate simply returns nil. -func (h *ManagerStub) Allocate(pod *v1.Pod) error { +func (h *ManagerStub) Allocate(pod *v1.Pod, container *v1.Container) error { return nil } diff --git a/pkg/kubelet/cm/devicemanager/types.go b/pkg/kubelet/cm/devicemanager/types.go index 114b36cc7c7..9fcafe53ec4 100644 --- a/pkg/kubelet/cm/devicemanager/types.go +++ b/pkg/kubelet/cm/devicemanager/types.go @@ -34,12 +34,12 @@ type Manager interface { // Start starts device plugin registration service. Start(activePods ActivePodsFunc, sourcesReady config.SourcesReady) error - // Allocate configures and assigns devices to a pod. From the requested - // device resources, Allocate will communicate with the owning device - // plugin to allow setup procedures to take place, and for the device - // plugin to provide runtime settings to use the device (environment - // variables, mount points and device files). - Allocate(pod *v1.Pod) error + // Allocate configures and assigns devices to a container in a pod. From + // the requested device resources, Allocate will communicate with the + // owning device plugin to allow setup procedures to take place, and for + // the device plugin to provide runtime settings to use the device + // (environment variables, mount points and device files). + Allocate(pod *v1.Pod, container *v1.Container) error // UpdatePluginResources updates node resources based on devices already // allocated to pods. The node object is provided for the device manager to