mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-29 06:27:05 +00:00
Merge pull request #87759 from klueska/upstream-move-cpu-allocation-to-pod-admit
Guarantee aligned resources across containers
This commit is contained in:
commit
ac32644d6e
@ -25,7 +25,6 @@ import (
|
|||||||
internalapi "k8s.io/cri-api/pkg/apis"
|
internalapi "k8s.io/cri-api/pkg/apis"
|
||||||
podresourcesapi "k8s.io/kubernetes/pkg/kubelet/apis/podresources/v1alpha1"
|
podresourcesapi "k8s.io/kubernetes/pkg/kubelet/apis/podresources/v1alpha1"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
|
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
|
||||||
"k8s.io/kubernetes/pkg/kubelet/config"
|
"k8s.io/kubernetes/pkg/kubelet/config"
|
||||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||||
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
|
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
|
||||||
@ -111,8 +110,8 @@ type ContainerManager interface {
|
|||||||
// due to node recreation.
|
// due to node recreation.
|
||||||
ShouldResetExtendedResourceCapacity() bool
|
ShouldResetExtendedResourceCapacity() bool
|
||||||
|
|
||||||
// GetTopologyPodAdmitHandler returns an instance of the TopologyManager for Pod Admission
|
// GetAllocateResourcesPodAdmitHandler returns an instance of a PodAdmitHandler responsible for allocating pod resources.
|
||||||
GetTopologyPodAdmitHandler() topologymanager.Manager
|
GetAllocateResourcesPodAdmitHandler() lifecycle.PodAdmitHandler
|
||||||
|
|
||||||
// UpdateAllocatedDevices frees any Devices that are bound to terminated pods.
|
// UpdateAllocatedDevices frees any Devices that are bound to terminated pods.
|
||||||
UpdateAllocatedDevices()
|
UpdateAllocatedDevices()
|
||||||
|
@ -672,11 +672,51 @@ func (cm *containerManagerImpl) GetResources(pod *v1.Pod, container *v1.Containe
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (cm *containerManagerImpl) UpdatePluginResources(node *schedulernodeinfo.NodeInfo, attrs *lifecycle.PodAdmitAttributes) error {
|
func (cm *containerManagerImpl) UpdatePluginResources(node *schedulernodeinfo.NodeInfo, attrs *lifecycle.PodAdmitAttributes) error {
|
||||||
return cm.deviceManager.Allocate(node, attrs)
|
return cm.deviceManager.UpdatePluginResources(node, attrs)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (cm *containerManagerImpl) GetTopologyPodAdmitHandler() topologymanager.Manager {
|
func (cm *containerManagerImpl) GetAllocateResourcesPodAdmitHandler() lifecycle.PodAdmitHandler {
|
||||||
|
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.TopologyManager) {
|
||||||
return cm.topologyManager
|
return cm.topologyManager
|
||||||
|
}
|
||||||
|
// TODO: we need to think about a better way to do this. This will work for
|
||||||
|
// now so long as we have only the cpuManager and deviceManager relying on
|
||||||
|
// allocations here. However, going forward it is not generalized enough to
|
||||||
|
// work as we add more and more hint providers that the TopologyManager
|
||||||
|
// needs to call Allocate() on (that may not be directly intstantiated
|
||||||
|
// inside this component).
|
||||||
|
return &resourceAllocator{cm.cpuManager, cm.deviceManager}
|
||||||
|
}
|
||||||
|
|
||||||
|
type resourceAllocator struct {
|
||||||
|
cpuManager cpumanager.Manager
|
||||||
|
deviceManager devicemanager.Manager
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *resourceAllocator) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAdmitResult {
|
||||||
|
pod := attrs.Pod
|
||||||
|
|
||||||
|
for _, container := range append(pod.Spec.InitContainers, pod.Spec.Containers...) {
|
||||||
|
err := m.deviceManager.Allocate(pod, &container)
|
||||||
|
if err != nil {
|
||||||
|
return lifecycle.PodAdmitResult{
|
||||||
|
Message: fmt.Sprintf("Allocate failed due to %v, which is unexpected", err),
|
||||||
|
Reason: "UnexpectedAdmissionError",
|
||||||
|
Admit: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
err = m.cpuManager.Allocate(pod, &container)
|
||||||
|
if err != nil {
|
||||||
|
return lifecycle.PodAdmitResult{
|
||||||
|
Message: fmt.Sprintf("Allocate failed due to %v, which is unexpected", err),
|
||||||
|
Reason: "UnexpectedAdmissionError",
|
||||||
|
Admit: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return lifecycle.PodAdmitResult{Admit: true}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (cm *containerManagerImpl) SystemCgroupsLimit() v1.ResourceList {
|
func (cm *containerManagerImpl) SystemCgroupsLimit() v1.ResourceList {
|
||||||
|
@ -117,8 +117,8 @@ func (cm *containerManagerStub) ShouldResetExtendedResourceCapacity() bool {
|
|||||||
return cm.shouldResetExtendedResourceCapacity
|
return cm.shouldResetExtendedResourceCapacity
|
||||||
}
|
}
|
||||||
|
|
||||||
func (cm *containerManagerStub) GetTopologyPodAdmitHandler() topologymanager.Manager {
|
func (cm *containerManagerStub) GetAllocateResourcesPodAdmitHandler() lifecycle.PodAdmitHandler {
|
||||||
return nil
|
return topologymanager.NewFakeManager()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (cm *containerManagerStub) UpdateAllocatedDevices() {
|
func (cm *containerManagerStub) UpdateAllocatedDevices() {
|
||||||
|
@ -177,7 +177,7 @@ func (cm *containerManagerImpl) ShouldResetExtendedResourceCapacity() bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
func (cm *containerManagerImpl) GetTopologyPodAdmitHandler() topologymanager.Manager {
|
func (cm *containerManagerImpl) GetAllocateResourcesPodAdmitHandler() lifecycle.PodAdmitHandler {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -55,6 +55,11 @@ type Manager interface {
|
|||||||
// Start is called during Kubelet initialization.
|
// Start is called during Kubelet initialization.
|
||||||
Start(activePods ActivePodsFunc, sourcesReady config.SourcesReady, podStatusProvider status.PodStatusProvider, containerRuntime runtimeService, initialContainers containermap.ContainerMap) error
|
Start(activePods ActivePodsFunc, sourcesReady config.SourcesReady, podStatusProvider status.PodStatusProvider, containerRuntime runtimeService, initialContainers containermap.ContainerMap) error
|
||||||
|
|
||||||
|
// Called to trigger the allocation of CPUs to a container. This must be
|
||||||
|
// called at some point prior to the AddContainer() call for a container,
|
||||||
|
// e.g. at pod admission time.
|
||||||
|
Allocate(pod *v1.Pod, container *v1.Container) error
|
||||||
|
|
||||||
// AddContainer is called between container create and container start
|
// AddContainer is called between container create and container start
|
||||||
// so that initial CPU affinity settings can be written through to the
|
// so that initial CPU affinity settings can be written through to the
|
||||||
// container runtime before the first process begins to execute.
|
// container runtime before the first process begins to execute.
|
||||||
@ -206,39 +211,33 @@ func (m *manager) Start(activePods ActivePodsFunc, sourcesReady config.SourcesRe
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *manager) AddContainer(p *v1.Pod, c *v1.Container, containerID string) error {
|
func (m *manager) Allocate(p *v1.Pod, c *v1.Container) error {
|
||||||
m.Lock()
|
m.Lock()
|
||||||
// Proactively remove CPUs from init containers that have already run.
|
defer m.Unlock()
|
||||||
// They are guaranteed to have run to completion before any other
|
|
||||||
// container is run.
|
|
||||||
for _, initContainer := range p.Spec.InitContainers {
|
|
||||||
if c.Name != initContainer.Name {
|
|
||||||
err := m.policyRemoveContainerByRef(string(p.UID), initContainer.Name)
|
|
||||||
if err != nil {
|
|
||||||
klog.Warningf("[cpumanager] unable to remove init container (pod: %s, container: %s, error: %v)", string(p.UID), initContainer.Name, err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Call down into the policy to assign this container CPUs if required.
|
// Call down into the policy to assign this container CPUs if required.
|
||||||
err := m.policyAddContainer(p, c, containerID)
|
err := m.policy.Allocate(m.state, p, c)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
klog.Errorf("[cpumanager] AddContainer error: %v", err)
|
klog.Errorf("[cpumanager] Allocate error: %v", err)
|
||||||
m.Unlock()
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get the CPUs just assigned to the container (or fall back to the default
|
return nil
|
||||||
// CPUSet if none were assigned).
|
}
|
||||||
|
|
||||||
|
func (m *manager) AddContainer(p *v1.Pod, c *v1.Container, containerID string) error {
|
||||||
|
m.Lock()
|
||||||
|
// Get the CPUs assigned to the container during Allocate()
|
||||||
|
// (or fall back to the default CPUSet if none were assigned).
|
||||||
cpus := m.state.GetCPUSetOrDefault(string(p.UID), c.Name)
|
cpus := m.state.GetCPUSetOrDefault(string(p.UID), c.Name)
|
||||||
m.Unlock()
|
m.Unlock()
|
||||||
|
|
||||||
if !cpus.IsEmpty() {
|
if !cpus.IsEmpty() {
|
||||||
err = m.updateContainerCPUSet(containerID, cpus)
|
err := m.updateContainerCPUSet(containerID, cpus)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
klog.Errorf("[cpumanager] AddContainer error: error updating CPUSet for container (pod: %s, container: %s, container id: %s, err: %v)", p.Name, c.Name, containerID, err)
|
klog.Errorf("[cpumanager] AddContainer error: error updating CPUSet for container (pod: %s, container: %s, container id: %s, err: %v)", p.Name, c.Name, containerID, err)
|
||||||
m.Lock()
|
m.Lock()
|
||||||
err := m.policyRemoveContainerByID(containerID)
|
err := m.policyRemoveContainerByRef(string(p.UID), c.Name)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
klog.Errorf("[cpumanager] AddContainer rollback state error: %v", err)
|
klog.Errorf("[cpumanager] AddContainer rollback state error: %v", err)
|
||||||
}
|
}
|
||||||
@ -246,6 +245,7 @@ func (m *manager) AddContainer(p *v1.Pod, c *v1.Container, containerID string) e
|
|||||||
}
|
}
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
klog.V(5).Infof("[cpumanager] update container resources is skipped due to cpu set is empty")
|
klog.V(5).Infof("[cpumanager] update container resources is skipped due to cpu set is empty")
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@ -263,14 +263,6 @@ func (m *manager) RemoveContainer(containerID string) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *manager) policyAddContainer(p *v1.Pod, c *v1.Container, containerID string) error {
|
|
||||||
err := m.policy.AddContainer(m.state, p, c)
|
|
||||||
if err == nil {
|
|
||||||
m.containerMap.Add(string(p.UID), c.Name, containerID)
|
|
||||||
}
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *manager) policyRemoveContainerByID(containerID string) error {
|
func (m *manager) policyRemoveContainerByID(containerID string) error {
|
||||||
podUID, containerName, err := m.containerMap.GetContainerRef(containerID)
|
podUID, containerName, err := m.containerMap.GetContainerRef(containerID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -104,7 +104,7 @@ func (p *mockPolicy) Start(s state.State) error {
|
|||||||
return p.err
|
return p.err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *mockPolicy) AddContainer(s state.State, pod *v1.Pod, container *v1.Container) error {
|
func (p *mockPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Container) error {
|
||||||
return p.err
|
return p.err
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -227,14 +227,16 @@ func TestCPUManagerAdd(t *testing.T) {
|
|||||||
updateErr error
|
updateErr error
|
||||||
policy Policy
|
policy Policy
|
||||||
expCPUSet cpuset.CPUSet
|
expCPUSet cpuset.CPUSet
|
||||||
expErr error
|
expAllocateErr error
|
||||||
|
expAddContainerErr error
|
||||||
}{
|
}{
|
||||||
{
|
{
|
||||||
description: "cpu manager add - no error",
|
description: "cpu manager add - no error",
|
||||||
updateErr: nil,
|
updateErr: nil,
|
||||||
policy: testPolicy,
|
policy: testPolicy,
|
||||||
expCPUSet: cpuset.NewCPUSet(3, 4),
|
expCPUSet: cpuset.NewCPUSet(3, 4),
|
||||||
expErr: nil,
|
expAllocateErr: nil,
|
||||||
|
expAddContainerErr: nil,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "cpu manager add - policy add container error",
|
description: "cpu manager add - policy add container error",
|
||||||
@ -243,14 +245,16 @@ func TestCPUManagerAdd(t *testing.T) {
|
|||||||
err: fmt.Errorf("fake reg error"),
|
err: fmt.Errorf("fake reg error"),
|
||||||
},
|
},
|
||||||
expCPUSet: cpuset.NewCPUSet(1, 2, 3, 4),
|
expCPUSet: cpuset.NewCPUSet(1, 2, 3, 4),
|
||||||
expErr: fmt.Errorf("fake reg error"),
|
expAllocateErr: fmt.Errorf("fake reg error"),
|
||||||
|
expAddContainerErr: nil,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "cpu manager add - container update error",
|
description: "cpu manager add - container update error",
|
||||||
updateErr: fmt.Errorf("fake update error"),
|
updateErr: fmt.Errorf("fake update error"),
|
||||||
policy: testPolicy,
|
policy: testPolicy,
|
||||||
expCPUSet: cpuset.NewCPUSet(1, 2, 3, 4),
|
expCPUSet: cpuset.NewCPUSet(1, 2, 3, 4),
|
||||||
expErr: fmt.Errorf("fake update error"),
|
expAllocateErr: nil,
|
||||||
|
expAddContainerErr: fmt.Errorf("fake update error"),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -271,10 +275,16 @@ func TestCPUManagerAdd(t *testing.T) {
|
|||||||
|
|
||||||
pod := makePod("fakePod", "fakeContainer", "2", "2")
|
pod := makePod("fakePod", "fakeContainer", "2", "2")
|
||||||
container := &pod.Spec.Containers[0]
|
container := &pod.Spec.Containers[0]
|
||||||
err := mgr.AddContainer(pod, container, "fakeID")
|
err := mgr.Allocate(pod, container)
|
||||||
if !reflect.DeepEqual(err, testCase.expErr) {
|
if !reflect.DeepEqual(err, testCase.expAllocateErr) {
|
||||||
|
t.Errorf("CPU Manager Allocate() error (%v). expected error: %v but got: %v",
|
||||||
|
testCase.description, testCase.expAllocateErr, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
err = mgr.AddContainer(pod, container, "fakeID")
|
||||||
|
if !reflect.DeepEqual(err, testCase.expAddContainerErr) {
|
||||||
t.Errorf("CPU Manager AddContainer() error (%v). expected error: %v but got: %v",
|
t.Errorf("CPU Manager AddContainer() error (%v). expected error: %v but got: %v",
|
||||||
testCase.description, testCase.expErr, err)
|
testCase.description, testCase.expAddContainerErr, err)
|
||||||
}
|
}
|
||||||
if !testCase.expCPUSet.Equals(mgr.state.GetDefaultCPUSet()) {
|
if !testCase.expCPUSet.Equals(mgr.state.GetDefaultCPUSet()) {
|
||||||
t.Errorf("CPU Manager AddContainer() error (%v). expected cpuset: %v but got: %v",
|
t.Errorf("CPU Manager AddContainer() error (%v). expected cpuset: %v but got: %v",
|
||||||
@ -494,7 +504,12 @@ func TestCPUManagerAddWithInitContainers(t *testing.T) {
|
|||||||
testCase.expCSets...)
|
testCase.expCSets...)
|
||||||
|
|
||||||
for i := range containers {
|
for i := range containers {
|
||||||
err := mgr.AddContainer(testCase.pod, &containers[i], containerIDs[i])
|
err := mgr.Allocate(testCase.pod, &containers[i])
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("StaticPolicy Allocate() error (%v). unexpected error for container id: %v: %v",
|
||||||
|
testCase.description, containerIDs[i], err)
|
||||||
|
}
|
||||||
|
err = mgr.AddContainer(testCase.pod, &containers[i], containerIDs[i])
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("StaticPolicy AddContainer() error (%v). unexpected error for container id: %v: %v",
|
t.Errorf("StaticPolicy AddContainer() error (%v). unexpected error for container id: %v: %v",
|
||||||
testCase.description, containerIDs[i], err)
|
testCase.description, containerIDs[i], err)
|
||||||
@ -974,21 +989,24 @@ func TestCPUManagerAddWithResvList(t *testing.T) {
|
|||||||
updateErr error
|
updateErr error
|
||||||
policy Policy
|
policy Policy
|
||||||
expCPUSet cpuset.CPUSet
|
expCPUSet cpuset.CPUSet
|
||||||
expErr error
|
expAllocateErr error
|
||||||
|
expAddContainerErr error
|
||||||
}{
|
}{
|
||||||
{
|
{
|
||||||
description: "cpu manager add - no error",
|
description: "cpu manager add - no error",
|
||||||
updateErr: nil,
|
updateErr: nil,
|
||||||
policy: testPolicy,
|
policy: testPolicy,
|
||||||
expCPUSet: cpuset.NewCPUSet(0, 3),
|
expCPUSet: cpuset.NewCPUSet(0, 3),
|
||||||
expErr: nil,
|
expAllocateErr: nil,
|
||||||
|
expAddContainerErr: nil,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "cpu manager add - container update error",
|
description: "cpu manager add - container update error",
|
||||||
updateErr: fmt.Errorf("fake update error"),
|
updateErr: fmt.Errorf("fake update error"),
|
||||||
policy: testPolicy,
|
policy: testPolicy,
|
||||||
expCPUSet: cpuset.NewCPUSet(0, 1, 2, 3),
|
expCPUSet: cpuset.NewCPUSet(0, 1, 2, 3),
|
||||||
expErr: fmt.Errorf("fake update error"),
|
expAllocateErr: nil,
|
||||||
|
expAddContainerErr: fmt.Errorf("fake update error"),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1009,10 +1027,16 @@ func TestCPUManagerAddWithResvList(t *testing.T) {
|
|||||||
|
|
||||||
pod := makePod("fakePod", "fakeContainer", "2", "2")
|
pod := makePod("fakePod", "fakeContainer", "2", "2")
|
||||||
container := &pod.Spec.Containers[0]
|
container := &pod.Spec.Containers[0]
|
||||||
err := mgr.AddContainer(pod, container, "fakeID")
|
err := mgr.Allocate(pod, container)
|
||||||
if !reflect.DeepEqual(err, testCase.expErr) {
|
if !reflect.DeepEqual(err, testCase.expAllocateErr) {
|
||||||
|
t.Errorf("CPU Manager Allocate() error (%v). expected error: %v but got: %v",
|
||||||
|
testCase.description, testCase.expAllocateErr, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
err = mgr.AddContainer(pod, container, "fakeID")
|
||||||
|
if !reflect.DeepEqual(err, testCase.expAddContainerErr) {
|
||||||
t.Errorf("CPU Manager AddContainer() error (%v). expected error: %v but got: %v",
|
t.Errorf("CPU Manager AddContainer() error (%v). expected error: %v but got: %v",
|
||||||
testCase.description, testCase.expErr, err)
|
testCase.description, testCase.expAddContainerErr, err)
|
||||||
}
|
}
|
||||||
if !testCase.expCPUSet.Equals(mgr.state.GetDefaultCPUSet()) {
|
if !testCase.expCPUSet.Equals(mgr.state.GetDefaultCPUSet()) {
|
||||||
t.Errorf("CPU Manager AddContainer() error (%v). expected cpuset: %v but got: %v",
|
t.Errorf("CPU Manager AddContainer() error (%v). expected cpuset: %v but got: %v",
|
||||||
|
@ -40,6 +40,11 @@ func (m *fakeManager) Policy() Policy {
|
|||||||
return NewNonePolicy()
|
return NewNonePolicy()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (m *fakeManager) Allocate(pod *v1.Pod, container *v1.Container) error {
|
||||||
|
klog.Infof("[fake cpumanager] Allocate (pod: %s, container: %s", pod.Name, container.Name)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func (m *fakeManager) AddContainer(pod *v1.Pod, container *v1.Container, containerID string) error {
|
func (m *fakeManager) AddContainer(pod *v1.Pod, container *v1.Container, containerID string) error {
|
||||||
klog.Infof("[fake cpumanager] AddContainer (pod: %s, container: %s, container id: %s)", pod.Name, container.Name, containerID)
|
klog.Infof("[fake cpumanager] AddContainer (pod: %s, container: %s, container id: %s)", pod.Name, container.Name, containerID)
|
||||||
return nil
|
return nil
|
||||||
|
@ -26,8 +26,8 @@ import (
|
|||||||
type Policy interface {
|
type Policy interface {
|
||||||
Name() string
|
Name() string
|
||||||
Start(s state.State) error
|
Start(s state.State) error
|
||||||
// AddContainer call is idempotent
|
// Allocate call is idempotent
|
||||||
AddContainer(s state.State, pod *v1.Pod, container *v1.Container) error
|
Allocate(s state.State, pod *v1.Pod, container *v1.Container) error
|
||||||
// RemoveContainer call is idempotent
|
// RemoveContainer call is idempotent
|
||||||
RemoveContainer(s state.State, podUID string, containerName string) error
|
RemoveContainer(s state.State, podUID string, containerName string) error
|
||||||
// GetTopologyHints implements the topologymanager.HintProvider Interface
|
// GetTopologyHints implements the topologymanager.HintProvider Interface
|
||||||
|
@ -44,7 +44,7 @@ func (p *nonePolicy) Start(s state.State) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *nonePolicy) AddContainer(s state.State, pod *v1.Pod, container *v1.Container) error {
|
func (p *nonePolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Container) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -33,7 +33,7 @@ func TestNonePolicyName(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestNonePolicyAdd(t *testing.T) {
|
func TestNonePolicyAllocate(t *testing.T) {
|
||||||
policy := &nonePolicy{}
|
policy := &nonePolicy{}
|
||||||
|
|
||||||
st := &mockState{
|
st := &mockState{
|
||||||
@ -44,9 +44,9 @@ func TestNonePolicyAdd(t *testing.T) {
|
|||||||
testPod := makePod("fakePod", "fakeContainer", "1000m", "1000m")
|
testPod := makePod("fakePod", "fakeContainer", "1000m", "1000m")
|
||||||
|
|
||||||
container := &testPod.Spec.Containers[0]
|
container := &testPod.Spec.Containers[0]
|
||||||
err := policy.AddContainer(st, testPod, container)
|
err := policy.Allocate(st, testPod, container)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("NonePolicy AddContainer() error. expected no error but got: %v", err)
|
t.Errorf("NonePolicy Allocate() error. expected no error but got: %v", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -188,9 +188,9 @@ func (p *staticPolicy) assignableCPUs(s state.State) cpuset.CPUSet {
|
|||||||
return s.GetDefaultCPUSet().Difference(p.reserved)
|
return s.GetDefaultCPUSet().Difference(p.reserved)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *staticPolicy) AddContainer(s state.State, pod *v1.Pod, container *v1.Container) error {
|
func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Container) error {
|
||||||
if numCPUs := p.guaranteedCPUs(pod, container); numCPUs != 0 {
|
if numCPUs := p.guaranteedCPUs(pod, container); numCPUs != 0 {
|
||||||
klog.Infof("[cpumanager] static policy: AddContainer (pod: %s, container: %s)", pod.Name, container.Name)
|
klog.Infof("[cpumanager] static policy: Allocate (pod: %s, container: %s)", pod.Name, container.Name)
|
||||||
// container belongs in an exclusively allocated pool
|
// container belongs in an exclusively allocated pool
|
||||||
|
|
||||||
if _, ok := s.GetCPUSet(string(pod.UID), container.Name); ok {
|
if _, ok := s.GetCPUSet(string(pod.UID), container.Name); ok {
|
||||||
@ -209,6 +209,17 @@ func (p *staticPolicy) AddContainer(s state.State, pod *v1.Pod, container *v1.Co
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
s.SetCPUSet(string(pod.UID), container.Name, cpuset)
|
s.SetCPUSet(string(pod.UID), container.Name, cpuset)
|
||||||
|
|
||||||
|
// Check if the container that has just been allocated resources is an init container.
|
||||||
|
// If so, release its CPUs back into the shared pool so they can be reallocated.
|
||||||
|
for _, initContainer := range pod.Spec.InitContainers {
|
||||||
|
if container.Name == initContainer.Name {
|
||||||
|
if toRelease, ok := s.GetCPUSet(string(pod.UID), container.Name); ok {
|
||||||
|
// Mutate the shared pool, adding released cpus.
|
||||||
|
s.SetDefaultCPUSet(s.GetDefaultCPUSet().Union(toRelease))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// container belongs in the shared pool (nothing to do; use default cpuset)
|
// container belongs in the shared pool (nothing to do; use default cpuset)
|
||||||
return nil
|
return nil
|
||||||
|
@ -444,26 +444,26 @@ func TestStaticPolicyAdd(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
container := &testCase.pod.Spec.Containers[0]
|
container := &testCase.pod.Spec.Containers[0]
|
||||||
err := policy.AddContainer(st, testCase.pod, container)
|
err := policy.Allocate(st, testCase.pod, container)
|
||||||
if !reflect.DeepEqual(err, testCase.expErr) {
|
if !reflect.DeepEqual(err, testCase.expErr) {
|
||||||
t.Errorf("StaticPolicy AddContainer() error (%v). expected add error: %v but got: %v",
|
t.Errorf("StaticPolicy Allocate() error (%v). expected add error: %v but got: %v",
|
||||||
testCase.description, testCase.expErr, err)
|
testCase.description, testCase.expErr, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if testCase.expCPUAlloc {
|
if testCase.expCPUAlloc {
|
||||||
cset, found := st.assignments[string(testCase.pod.UID)][container.Name]
|
cset, found := st.assignments[string(testCase.pod.UID)][container.Name]
|
||||||
if !found {
|
if !found {
|
||||||
t.Errorf("StaticPolicy AddContainer() error (%v). expected container %v to be present in assignments %v",
|
t.Errorf("StaticPolicy Allocate() error (%v). expected container %v to be present in assignments %v",
|
||||||
testCase.description, container.Name, st.assignments)
|
testCase.description, container.Name, st.assignments)
|
||||||
}
|
}
|
||||||
|
|
||||||
if !reflect.DeepEqual(cset, testCase.expCSet) {
|
if !reflect.DeepEqual(cset, testCase.expCSet) {
|
||||||
t.Errorf("StaticPolicy AddContainer() error (%v). expected cpuset %v but got %v",
|
t.Errorf("StaticPolicy Allocate() error (%v). expected cpuset %v but got %v",
|
||||||
testCase.description, testCase.expCSet, cset)
|
testCase.description, testCase.expCSet, cset)
|
||||||
}
|
}
|
||||||
|
|
||||||
if !cset.Intersection(st.defaultCPUSet).IsEmpty() {
|
if !cset.Intersection(st.defaultCPUSet).IsEmpty() {
|
||||||
t.Errorf("StaticPolicy AddContainer() error (%v). expected cpuset %v to be disoint from the shared cpuset %v",
|
t.Errorf("StaticPolicy Allocate() error (%v). expected cpuset %v to be disoint from the shared cpuset %v",
|
||||||
testCase.description, cset, st.defaultCPUSet)
|
testCase.description, cset, st.defaultCPUSet)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -471,7 +471,7 @@ func TestStaticPolicyAdd(t *testing.T) {
|
|||||||
if !testCase.expCPUAlloc {
|
if !testCase.expCPUAlloc {
|
||||||
_, found := st.assignments[string(testCase.pod.UID)][container.Name]
|
_, found := st.assignments[string(testCase.pod.UID)][container.Name]
|
||||||
if found {
|
if found {
|
||||||
t.Errorf("StaticPolicy AddContainer() error (%v). Did not expect container %v to be present in assignments %v",
|
t.Errorf("StaticPolicy Allocate() error (%v). Did not expect container %v to be present in assignments %v",
|
||||||
testCase.description, container.Name, st.assignments)
|
testCase.description, container.Name, st.assignments)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -786,26 +786,26 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
container := &testCase.pod.Spec.Containers[0]
|
container := &testCase.pod.Spec.Containers[0]
|
||||||
err := policy.AddContainer(st, testCase.pod, container)
|
err := policy.Allocate(st, testCase.pod, container)
|
||||||
if !reflect.DeepEqual(err, testCase.expErr) {
|
if !reflect.DeepEqual(err, testCase.expErr) {
|
||||||
t.Errorf("StaticPolicy AddContainer() error (%v). expected add error: %v but got: %v",
|
t.Errorf("StaticPolicy Allocate() error (%v). expected add error: %v but got: %v",
|
||||||
testCase.description, testCase.expErr, err)
|
testCase.description, testCase.expErr, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if testCase.expCPUAlloc {
|
if testCase.expCPUAlloc {
|
||||||
cset, found := st.assignments[string(testCase.pod.UID)][container.Name]
|
cset, found := st.assignments[string(testCase.pod.UID)][container.Name]
|
||||||
if !found {
|
if !found {
|
||||||
t.Errorf("StaticPolicy AddContainer() error (%v). expected container %v to be present in assignments %v",
|
t.Errorf("StaticPolicy Allocate() error (%v). expected container %v to be present in assignments %v",
|
||||||
testCase.description, container.Name, st.assignments)
|
testCase.description, container.Name, st.assignments)
|
||||||
}
|
}
|
||||||
|
|
||||||
if !reflect.DeepEqual(cset, testCase.expCSet) {
|
if !reflect.DeepEqual(cset, testCase.expCSet) {
|
||||||
t.Errorf("StaticPolicy AddContainer() error (%v). expected cpuset %v but got %v",
|
t.Errorf("StaticPolicy Allocate() error (%v). expected cpuset %v but got %v",
|
||||||
testCase.description, testCase.expCSet, cset)
|
testCase.description, testCase.expCSet, cset)
|
||||||
}
|
}
|
||||||
|
|
||||||
if !cset.Intersection(st.defaultCPUSet).IsEmpty() {
|
if !cset.Intersection(st.defaultCPUSet).IsEmpty() {
|
||||||
t.Errorf("StaticPolicy AddContainer() error (%v). expected cpuset %v to be disoint from the shared cpuset %v",
|
t.Errorf("StaticPolicy Allocate() error (%v). expected cpuset %v to be disoint from the shared cpuset %v",
|
||||||
testCase.description, cset, st.defaultCPUSet)
|
testCase.description, cset, st.defaultCPUSet)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -813,7 +813,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
|||||||
if !testCase.expCPUAlloc {
|
if !testCase.expCPUAlloc {
|
||||||
_, found := st.assignments[string(testCase.pod.UID)][container.Name]
|
_, found := st.assignments[string(testCase.pod.UID)][container.Name]
|
||||||
if found {
|
if found {
|
||||||
t.Errorf("StaticPolicy AddContainer() error (%v). Did not expect container %v to be present in assignments %v",
|
t.Errorf("StaticPolicy Allocate() error (%v). Did not expect container %v to be present in assignments %v",
|
||||||
testCase.description, container.Name, st.assignments)
|
testCase.description, container.Name, st.assignments)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -105,6 +105,10 @@ type ManagerImpl struct {
|
|||||||
|
|
||||||
// Store of Topology Affinties that the Device Manager can query.
|
// Store of Topology Affinties that the Device Manager can query.
|
||||||
topologyAffinityStore topologymanager.Store
|
topologyAffinityStore topologymanager.Store
|
||||||
|
|
||||||
|
// devicesToReuse contains devices that can be reused as they have been allocated to
|
||||||
|
// init containers.
|
||||||
|
devicesToReuse PodReusableDevices
|
||||||
}
|
}
|
||||||
|
|
||||||
type endpointInfo struct {
|
type endpointInfo struct {
|
||||||
@ -114,6 +118,9 @@ type endpointInfo struct {
|
|||||||
|
|
||||||
type sourcesReadyStub struct{}
|
type sourcesReadyStub struct{}
|
||||||
|
|
||||||
|
// PodReusableDevices is a map by pod name of devices to reuse.
|
||||||
|
type PodReusableDevices map[string]map[string]sets.String
|
||||||
|
|
||||||
func (s *sourcesReadyStub) AddSource(source string) {}
|
func (s *sourcesReadyStub) AddSource(source string) {}
|
||||||
func (s *sourcesReadyStub) AllReady() bool { return true }
|
func (s *sourcesReadyStub) AllReady() bool { return true }
|
||||||
|
|
||||||
@ -147,6 +154,7 @@ func newManagerImpl(socketPath string, numaNodeInfo cputopology.NUMANodeInfo, to
|
|||||||
podDevices: make(podDevices),
|
podDevices: make(podDevices),
|
||||||
numaNodes: numaNodes,
|
numaNodes: numaNodes,
|
||||||
topologyAffinityStore: topologyAffinityStore,
|
topologyAffinityStore: topologyAffinityStore,
|
||||||
|
devicesToReuse: make(PodReusableDevices),
|
||||||
}
|
}
|
||||||
manager.callback = manager.genericDeviceUpdateCallback
|
manager.callback = manager.genericDeviceUpdateCallback
|
||||||
|
|
||||||
@ -350,32 +358,41 @@ func (m *ManagerImpl) isVersionCompatibleWithPlugin(versions []string) bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *ManagerImpl) allocatePodResources(pod *v1.Pod) error {
|
|
||||||
devicesToReuse := make(map[string]sets.String)
|
|
||||||
for _, container := range pod.Spec.InitContainers {
|
|
||||||
if err := m.allocateContainerResources(pod, &container, devicesToReuse); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
m.podDevices.addContainerAllocatedResources(string(pod.UID), container.Name, devicesToReuse)
|
|
||||||
}
|
|
||||||
for _, container := range pod.Spec.Containers {
|
|
||||||
if err := m.allocateContainerResources(pod, &container, devicesToReuse); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
m.podDevices.removeContainerAllocatedResources(string(pod.UID), container.Name, devicesToReuse)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Allocate is the call that you can use to allocate a set of devices
|
// Allocate is the call that you can use to allocate a set of devices
|
||||||
// from the registered device plugins.
|
// from the registered device plugins.
|
||||||
func (m *ManagerImpl) Allocate(node *schedulernodeinfo.NodeInfo, attrs *lifecycle.PodAdmitAttributes) error {
|
func (m *ManagerImpl) Allocate(pod *v1.Pod, container *v1.Container) error {
|
||||||
pod := attrs.Pod
|
if _, ok := m.devicesToReuse[string(pod.UID)]; !ok {
|
||||||
err := m.allocatePodResources(pod)
|
m.devicesToReuse[string(pod.UID)] = make(map[string]sets.String)
|
||||||
if err != nil {
|
}
|
||||||
klog.Errorf("Failed to allocate device plugin resource for pod %s: %v", string(pod.UID), err)
|
// If pod entries to m.devicesToReuse other than the current pod exist, delete them.
|
||||||
|
for podUID := range m.devicesToReuse {
|
||||||
|
if podUID != string(pod.UID) {
|
||||||
|
delete(m.devicesToReuse, podUID)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Allocate resources for init containers first as we know the caller always loops
|
||||||
|
// through init containers before looping through app containers. Should the caller
|
||||||
|
// ever change those semantics, this logic will need to be amended.
|
||||||
|
for _, initContainer := range pod.Spec.InitContainers {
|
||||||
|
if container.Name == initContainer.Name {
|
||||||
|
if err := m.allocateContainerResources(pod, container, m.devicesToReuse[string(pod.UID)]); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
m.podDevices.addContainerAllocatedResources(string(pod.UID), container.Name, m.devicesToReuse[string(pod.UID)])
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if err := m.allocateContainerResources(pod, container, m.devicesToReuse[string(pod.UID)]); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
m.podDevices.removeContainerAllocatedResources(string(pod.UID), container.Name, m.devicesToReuse[string(pod.UID)])
|
||||||
|
return nil
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// UpdatePluginResources updates node resources based on devices already allocated to pods.
|
||||||
|
func (m *ManagerImpl) UpdatePluginResources(node *schedulernodeinfo.NodeInfo, attrs *lifecycle.PodAdmitAttributes) error {
|
||||||
|
pod := attrs.Pod
|
||||||
|
|
||||||
m.mutex.Lock()
|
m.mutex.Lock()
|
||||||
defer m.mutex.Unlock()
|
defer m.mutex.Unlock()
|
||||||
@ -860,8 +877,8 @@ func (m *ManagerImpl) GetDeviceRunContainerOptions(pod *v1.Pod, container *v1.Co
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if needsReAllocate {
|
if needsReAllocate {
|
||||||
klog.V(2).Infof("needs re-allocate device plugin resources for pod %s", podUID)
|
klog.V(2).Infof("needs re-allocate device plugin resources for pod %s, container %s", podUID, container.Name)
|
||||||
if err := m.allocatePodResources(pod); err != nil {
|
if err := m.Allocate(pod, container); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -45,7 +45,12 @@ func (h *ManagerStub) Stop() error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Allocate simply returns nil.
|
// Allocate simply returns nil.
|
||||||
func (h *ManagerStub) Allocate(node *schedulernodeinfo.NodeInfo, attrs *lifecycle.PodAdmitAttributes) error {
|
func (h *ManagerStub) Allocate(pod *v1.Pod, container *v1.Container) error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// UpdatePluginResources simply returns nil.
|
||||||
|
func (h *ManagerStub) UpdatePluginResources(node *schedulernodeinfo.NodeInfo, attrs *lifecycle.PodAdmitAttributes) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -30,6 +30,7 @@ import (
|
|||||||
v1 "k8s.io/api/core/v1"
|
v1 "k8s.io/api/core/v1"
|
||||||
"k8s.io/apimachinery/pkg/api/resource"
|
"k8s.io/apimachinery/pkg/api/resource"
|
||||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
|
"k8s.io/apimachinery/pkg/types"
|
||||||
"k8s.io/apimachinery/pkg/util/sets"
|
"k8s.io/apimachinery/pkg/util/sets"
|
||||||
"k8s.io/apimachinery/pkg/util/uuid"
|
"k8s.io/apimachinery/pkg/util/uuid"
|
||||||
"k8s.io/apimachinery/pkg/util/wait"
|
"k8s.io/apimachinery/pkg/util/wait"
|
||||||
@ -604,6 +605,7 @@ func getTestManager(tmpDir string, activePods ActivePodsFunc, testRes []TestReso
|
|||||||
allocatedDevices: make(map[string]sets.String),
|
allocatedDevices: make(map[string]sets.String),
|
||||||
endpoints: make(map[string]endpointInfo),
|
endpoints: make(map[string]endpointInfo),
|
||||||
podDevices: make(podDevices),
|
podDevices: make(podDevices),
|
||||||
|
devicesToReuse: make(PodReusableDevices),
|
||||||
topologyAffinityStore: topologymanager.NewFakeManager(),
|
topologyAffinityStore: topologymanager.NewFakeManager(),
|
||||||
activePods: activePods,
|
activePods: activePods,
|
||||||
sourcesReady: &sourcesReadyStub{},
|
sourcesReady: &sourcesReadyStub{},
|
||||||
@ -648,17 +650,6 @@ func getTestManager(tmpDir string, activePods ActivePodsFunc, testRes []TestReso
|
|||||||
return testManager, nil
|
return testManager, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getTestNodeInfo(allocatable v1.ResourceList) *schedulernodeinfo.NodeInfo {
|
|
||||||
cachedNode := &v1.Node{
|
|
||||||
Status: v1.NodeStatus{
|
|
||||||
Allocatable: allocatable,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
nodeInfo := &schedulernodeinfo.NodeInfo{}
|
|
||||||
nodeInfo.SetNode(cachedNode)
|
|
||||||
return nodeInfo
|
|
||||||
}
|
|
||||||
|
|
||||||
type TestResource struct {
|
type TestResource struct {
|
||||||
resourceName string
|
resourceName string
|
||||||
resourceQuantity resource.Quantity
|
resourceQuantity resource.Quantity
|
||||||
@ -686,7 +677,6 @@ func TestPodContainerDeviceAllocation(t *testing.T) {
|
|||||||
tmpDir, err := ioutil.TempDir("", "checkpoint")
|
tmpDir, err := ioutil.TempDir("", "checkpoint")
|
||||||
as.Nil(err)
|
as.Nil(err)
|
||||||
defer os.RemoveAll(tmpDir)
|
defer os.RemoveAll(tmpDir)
|
||||||
nodeInfo := getTestNodeInfo(v1.ResourceList{})
|
|
||||||
testManager, err := getTestManager(tmpDir, podsStub.getActivePods, testResources)
|
testManager, err := getTestManager(tmpDir, podsStub.getActivePods, testResources)
|
||||||
as.Nil(err)
|
as.Nil(err)
|
||||||
|
|
||||||
@ -738,7 +728,7 @@ func TestPodContainerDeviceAllocation(t *testing.T) {
|
|||||||
pod := testCase.testPod
|
pod := testCase.testPod
|
||||||
activePods = append(activePods, pod)
|
activePods = append(activePods, pod)
|
||||||
podsStub.updateActivePods(activePods)
|
podsStub.updateActivePods(activePods)
|
||||||
err := testManager.Allocate(nodeInfo, &lifecycle.PodAdmitAttributes{Pod: pod})
|
err := testManager.Allocate(pod, &pod.Spec.Containers[0])
|
||||||
if !reflect.DeepEqual(err, testCase.expErr) {
|
if !reflect.DeepEqual(err, testCase.expErr) {
|
||||||
t.Errorf("DevicePluginManager error (%v). expected error: %v but got: %v",
|
t.Errorf("DevicePluginManager error (%v). expected error: %v but got: %v",
|
||||||
testCase.description, testCase.expErr, err)
|
testCase.description, testCase.expErr, err)
|
||||||
@ -780,7 +770,6 @@ func TestInitContainerDeviceAllocation(t *testing.T) {
|
|||||||
podsStub := activePodsStub{
|
podsStub := activePodsStub{
|
||||||
activePods: []*v1.Pod{},
|
activePods: []*v1.Pod{},
|
||||||
}
|
}
|
||||||
nodeInfo := getTestNodeInfo(v1.ResourceList{})
|
|
||||||
tmpDir, err := ioutil.TempDir("", "checkpoint")
|
tmpDir, err := ioutil.TempDir("", "checkpoint")
|
||||||
as.Nil(err)
|
as.Nil(err)
|
||||||
defer os.RemoveAll(tmpDir)
|
defer os.RemoveAll(tmpDir)
|
||||||
@ -834,7 +823,12 @@ func TestInitContainerDeviceAllocation(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
podsStub.updateActivePods([]*v1.Pod{podWithPluginResourcesInInitContainers})
|
podsStub.updateActivePods([]*v1.Pod{podWithPluginResourcesInInitContainers})
|
||||||
err = testManager.Allocate(nodeInfo, &lifecycle.PodAdmitAttributes{Pod: podWithPluginResourcesInInitContainers})
|
for _, container := range podWithPluginResourcesInInitContainers.Spec.InitContainers {
|
||||||
|
err = testManager.Allocate(podWithPluginResourcesInInitContainers, &container)
|
||||||
|
}
|
||||||
|
for _, container := range podWithPluginResourcesInInitContainers.Spec.Containers {
|
||||||
|
err = testManager.Allocate(podWithPluginResourcesInInitContainers, &container)
|
||||||
|
}
|
||||||
as.Nil(err)
|
as.Nil(err)
|
||||||
podUID := string(podWithPluginResourcesInInitContainers.UID)
|
podUID := string(podWithPluginResourcesInInitContainers.UID)
|
||||||
initCont1 := podWithPluginResourcesInInitContainers.Spec.InitContainers[0].Name
|
initCont1 := podWithPluginResourcesInInitContainers.Spec.InitContainers[0].Name
|
||||||
@ -855,7 +849,10 @@ func TestInitContainerDeviceAllocation(t *testing.T) {
|
|||||||
as.Equal(0, normalCont1Devices.Intersection(normalCont2Devices).Len())
|
as.Equal(0, normalCont1Devices.Intersection(normalCont2Devices).Len())
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestSanitizeNodeAllocatable(t *testing.T) {
|
func TestUpdatePluginResources(t *testing.T) {
|
||||||
|
pod := &v1.Pod{}
|
||||||
|
pod.UID = types.UID("testPod")
|
||||||
|
|
||||||
resourceName1 := "domain1.com/resource1"
|
resourceName1 := "domain1.com/resource1"
|
||||||
devID1 := "dev1"
|
devID1 := "dev1"
|
||||||
|
|
||||||
@ -876,6 +873,8 @@ func TestSanitizeNodeAllocatable(t *testing.T) {
|
|||||||
podDevices: make(podDevices),
|
podDevices: make(podDevices),
|
||||||
checkpointManager: ckm,
|
checkpointManager: ckm,
|
||||||
}
|
}
|
||||||
|
testManager.podDevices[string(pod.UID)] = make(containerDevices)
|
||||||
|
|
||||||
// require one of resource1 and one of resource2
|
// require one of resource1 and one of resource2
|
||||||
testManager.allocatedDevices[resourceName1] = sets.NewString()
|
testManager.allocatedDevices[resourceName1] = sets.NewString()
|
||||||
testManager.allocatedDevices[resourceName1].Insert(devID1)
|
testManager.allocatedDevices[resourceName1].Insert(devID1)
|
||||||
@ -893,7 +892,7 @@ func TestSanitizeNodeAllocatable(t *testing.T) {
|
|||||||
nodeInfo := &schedulernodeinfo.NodeInfo{}
|
nodeInfo := &schedulernodeinfo.NodeInfo{}
|
||||||
nodeInfo.SetNode(cachedNode)
|
nodeInfo.SetNode(cachedNode)
|
||||||
|
|
||||||
testManager.sanitizeNodeAllocatable(nodeInfo)
|
testManager.UpdatePluginResources(nodeInfo, &lifecycle.PodAdmitAttributes{Pod: pod})
|
||||||
|
|
||||||
allocatableScalarResources := nodeInfo.AllocatableResource().ScalarResources
|
allocatableScalarResources := nodeInfo.AllocatableResource().ScalarResources
|
||||||
// allocatable in nodeInfo is less than needed, should update
|
// allocatable in nodeInfo is less than needed, should update
|
||||||
@ -918,7 +917,6 @@ func TestDevicePreStartContainer(t *testing.T) {
|
|||||||
tmpDir, err := ioutil.TempDir("", "checkpoint")
|
tmpDir, err := ioutil.TempDir("", "checkpoint")
|
||||||
as.Nil(err)
|
as.Nil(err)
|
||||||
defer os.RemoveAll(tmpDir)
|
defer os.RemoveAll(tmpDir)
|
||||||
nodeInfo := getTestNodeInfo(v1.ResourceList{})
|
|
||||||
|
|
||||||
testManager, err := getTestManager(tmpDir, podsStub.getActivePods, []TestResource{res1})
|
testManager, err := getTestManager(tmpDir, podsStub.getActivePods, []TestResource{res1})
|
||||||
as.Nil(err)
|
as.Nil(err)
|
||||||
@ -936,7 +934,7 @@ func TestDevicePreStartContainer(t *testing.T) {
|
|||||||
activePods := []*v1.Pod{}
|
activePods := []*v1.Pod{}
|
||||||
activePods = append(activePods, pod)
|
activePods = append(activePods, pod)
|
||||||
podsStub.updateActivePods(activePods)
|
podsStub.updateActivePods(activePods)
|
||||||
err = testManager.Allocate(nodeInfo, &lifecycle.PodAdmitAttributes{Pod: pod})
|
err = testManager.Allocate(pod, &pod.Spec.Containers[0])
|
||||||
as.Nil(err)
|
as.Nil(err)
|
||||||
runContainerOpts, err := testManager.GetDeviceRunContainerOptions(pod, &pod.Spec.Containers[0])
|
runContainerOpts, err := testManager.GetDeviceRunContainerOptions(pod, &pod.Spec.Containers[0])
|
||||||
as.Nil(err)
|
as.Nil(err)
|
||||||
|
@ -34,15 +34,17 @@ type Manager interface {
|
|||||||
// Start starts device plugin registration service.
|
// Start starts device plugin registration service.
|
||||||
Start(activePods ActivePodsFunc, sourcesReady config.SourcesReady) error
|
Start(activePods ActivePodsFunc, sourcesReady config.SourcesReady) error
|
||||||
|
|
||||||
// Allocate configures and assigns devices to pods. The pods are provided
|
// Allocate configures and assigns devices to a container in a pod. From
|
||||||
// through the pod admission attributes in the attrs argument. From the
|
// the requested device resources, Allocate will communicate with the
|
||||||
// requested device resources, Allocate will communicate with the owning
|
// owning device plugin to allow setup procedures to take place, and for
|
||||||
// device plugin to allow setup procedures to take place, and for the
|
// the device plugin to provide runtime settings to use the device
|
||||||
// device plugin to provide runtime settings to use the device (environment
|
// (environment variables, mount points and device files).
|
||||||
// variables, mount points and device files). The node object is provided
|
Allocate(pod *v1.Pod, container *v1.Container) error
|
||||||
// for the device manager to update the node capacity to reflect the
|
|
||||||
// currently available devices.
|
// UpdatePluginResources updates node resources based on devices already
|
||||||
Allocate(node *schedulernodeinfo.NodeInfo, attrs *lifecycle.PodAdmitAttributes) error
|
// allocated to pods. The node object is provided for the device manager to
|
||||||
|
// update the node capacity to reflect the currently available devices.
|
||||||
|
UpdatePluginResources(node *schedulernodeinfo.NodeInfo, attrs *lifecycle.PodAdmitAttributes) error
|
||||||
|
|
||||||
// Stop stops the manager.
|
// Stop stops the manager.
|
||||||
Stop() error
|
Stop() error
|
||||||
|
@ -77,6 +77,10 @@ type HintProvider interface {
|
|||||||
// a consensus "best" hint. The hint providers may subsequently query the
|
// a consensus "best" hint. The hint providers may subsequently query the
|
||||||
// topology manager to influence actual resource assignment.
|
// topology manager to influence actual resource assignment.
|
||||||
GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]TopologyHint
|
GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]TopologyHint
|
||||||
|
// Allocate triggers resource allocation to occur on the HintProvider after
|
||||||
|
// all hints have been gathered and the aggregated Hint is available via a
|
||||||
|
// call to Store.GetAffinity().
|
||||||
|
Allocate(pod *v1.Pod, container *v1.Container) error
|
||||||
}
|
}
|
||||||
|
|
||||||
//Store interface is to allow Hint Providers to retrieve pod affinity
|
//Store interface is to allow Hint Providers to retrieve pod affinity
|
||||||
@ -176,6 +180,16 @@ func (m *manager) accumulateProvidersHints(pod *v1.Pod, container *v1.Container)
|
|||||||
return providersHints
|
return providersHints
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (m *manager) allocateAlignedResources(pod *v1.Pod, container *v1.Container) error {
|
||||||
|
for _, provider := range m.hintProviders {
|
||||||
|
err := provider.Allocate(pod, container)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// Collect Hints from hint providers and pass to policy to retrieve the best one.
|
// Collect Hints from hint providers and pass to policy to retrieve the best one.
|
||||||
func (m *manager) calculateAffinity(pod *v1.Pod, container *v1.Container) (TopologyHint, bool) {
|
func (m *manager) calculateAffinity(pod *v1.Pod, container *v1.Container) (TopologyHint, bool) {
|
||||||
providersHints := m.accumulateProvidersHints(pod, container)
|
providersHints := m.accumulateProvidersHints(pod, container)
|
||||||
@ -216,7 +230,6 @@ func (m *manager) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAdmitR
|
|||||||
|
|
||||||
klog.Infof("[topologymanager] Topology Admit Handler")
|
klog.Infof("[topologymanager] Topology Admit Handler")
|
||||||
pod := attrs.Pod
|
pod := attrs.Pod
|
||||||
hints := make(map[string]TopologyHint)
|
|
||||||
|
|
||||||
for _, container := range append(pod.Spec.InitContainers, pod.Spec.Containers...) {
|
for _, container := range append(pod.Spec.InitContainers, pod.Spec.Containers...) {
|
||||||
result, admit := m.calculateAffinity(pod, &container)
|
result, admit := m.calculateAffinity(pod, &container)
|
||||||
@ -227,11 +240,22 @@ func (m *manager) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAdmitR
|
|||||||
Admit: false,
|
Admit: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
hints[container.Name] = result
|
|
||||||
}
|
|
||||||
|
|
||||||
m.podTopologyHints[string(pod.UID)] = hints
|
klog.Infof("[topologymanager] Topology Affinity for (pod: %v container: %v): %v", pod.UID, container.Name, result)
|
||||||
klog.Infof("[topologymanager] Topology Affinity for Pod: %v are %v", pod.UID, m.podTopologyHints[string(pod.UID)])
|
if m.podTopologyHints[string(pod.UID)] == nil {
|
||||||
|
m.podTopologyHints[string(pod.UID)] = make(map[string]TopologyHint)
|
||||||
|
}
|
||||||
|
m.podTopologyHints[string(pod.UID)][container.Name] = result
|
||||||
|
|
||||||
|
err := m.allocateAlignedResources(pod, &container)
|
||||||
|
if err != nil {
|
||||||
|
return lifecycle.PodAdmitResult{
|
||||||
|
Message: fmt.Sprintf("Allocate failed due to %v, which is unexpected", err),
|
||||||
|
Reason: "UnexpectedAdmissionError",
|
||||||
|
Admit: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return lifecycle.PodAdmitResult{Admit: true}
|
return lifecycle.PodAdmitResult{Admit: true}
|
||||||
}
|
}
|
||||||
|
@ -75,12 +75,20 @@ func TestNewManager(t *testing.T) {
|
|||||||
|
|
||||||
type mockHintProvider struct {
|
type mockHintProvider struct {
|
||||||
th map[string][]TopologyHint
|
th map[string][]TopologyHint
|
||||||
|
//TODO: Add this field and add some tests to make sure things error out
|
||||||
|
//appropriately on allocation errors.
|
||||||
|
//allocateError error
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *mockHintProvider) GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]TopologyHint {
|
func (m *mockHintProvider) GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]TopologyHint {
|
||||||
return m.th
|
return m.th
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (m *mockHintProvider) Allocate(pod *v1.Pod, container *v1.Container) error {
|
||||||
|
//return allocateError
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func TestGetAffinity(t *testing.T) {
|
func TestGetAffinity(t *testing.T) {
|
||||||
tcases := []struct {
|
tcases := []struct {
|
||||||
name string
|
name string
|
||||||
|
@ -867,9 +867,9 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
|
|||||||
}
|
}
|
||||||
klet.AddPodSyncLoopHandler(activeDeadlineHandler)
|
klet.AddPodSyncLoopHandler(activeDeadlineHandler)
|
||||||
klet.AddPodSyncHandler(activeDeadlineHandler)
|
klet.AddPodSyncHandler(activeDeadlineHandler)
|
||||||
if utilfeature.DefaultFeatureGate.Enabled(features.TopologyManager) {
|
|
||||||
klet.admitHandlers.AddPodAdmitHandler(klet.containerManager.GetTopologyPodAdmitHandler())
|
klet.admitHandlers.AddPodAdmitHandler(klet.containerManager.GetAllocateResourcesPodAdmitHandler())
|
||||||
}
|
|
||||||
criticalPodAdmissionHandler := preemption.NewCriticalPodAdmissionHandler(klet.GetActivePods, killPodNow(klet.podWorkers, kubeDeps.Recorder), kubeDeps.Recorder)
|
criticalPodAdmissionHandler := preemption.NewCriticalPodAdmissionHandler(klet.GetActivePods, killPodNow(klet.podWorkers, kubeDeps.Recorder), kubeDeps.Recorder)
|
||||||
klet.admitHandlers.AddPodAdmitHandler(lifecycle.NewPredicateAdmitHandler(klet.getNodeAnyWay, criticalPodAdmissionHandler, klet.containerManager.UpdatePluginResources))
|
klet.admitHandlers.AddPodAdmitHandler(lifecycle.NewPredicateAdmitHandler(klet.getNodeAnyWay, criticalPodAdmissionHandler, klet.containerManager.UpdatePluginResources))
|
||||||
// apply functional Option's
|
// apply functional Option's
|
||||||
|
Loading…
Reference in New Issue
Block a user