Ensure CPUManager TopologyHints are regenerated after kubelet restart

This patch also includes test to make sure the newly added logic works
as expected.
This commit is contained in:
Kevin Klues
2019-11-05 13:09:49 +00:00
parent a338c8f7fd
commit 9dc116eb08
3 changed files with 118 additions and 1 deletions

View File

@@ -320,6 +320,23 @@ func (p *staticPolicy) GetTopologyHints(s state.State, pod v1.Pod, container v1.
return nil
}
// Short circuit to regenerate the same hints if there are already
// guaranteed CPUs allocated to the Container. This might happen after a
// kubelet restart, for example.
containerID, _ := findContainerIDByName(&pod.Status, container.Name)
if allocated, exists := s.GetCPUSet(containerID); exists {
if allocated.Size() != requested {
klog.Errorf("[cpumanager] CPUs already allocated to (pod %v, container %v) with different number than request: requested: %d, allocated: %d", string(pod.UID), container.Name, requested, allocated.Size())
return map[string][]topologymanager.TopologyHint{
string(v1.ResourceCPU): {},
}
}
klog.Infof("[cpumanager] Regenerating TopologyHints for CPUs already allocated to (pod %v, container %v)", string(pod.UID), container.Name)
return map[string][]topologymanager.TopologyHint{
string(v1.ResourceCPU): p.generateCPUTopologyHints(allocated, requested),
}
}
// Get a list of available CPUs.
available := p.assignableCPUs(s)

View File

@@ -213,6 +213,20 @@ func TestStaticPolicyAdd(t *testing.T) {
expCPUAlloc: true,
expCSet: cpuset.NewCPUSet(1, 5),
},
{
description: "GuPodMultipleCores, SingleSocketHT, ExpectSameAllocation",
topo: topoSingleSocketHT,
numReservedCPUs: 1,
containerID: "fakeID3",
stAssignments: state.ContainerCPUAssignments{
"fakeID3": cpuset.NewCPUSet(2, 3, 6, 7),
},
stDefaultCPUSet: cpuset.NewCPUSet(0, 1, 4, 5),
pod: makePod("4000m", "4000m"),
expErr: nil,
expCPUAlloc: true,
expCSet: cpuset.NewCPUSet(2, 3, 6, 7),
},
{
description: "GuPodMultipleCores, DualSocketHT, ExpectAllocOneSocket",
topo: topoDualSocketHT,

View File

@@ -23,6 +23,7 @@ import (
cadvisorapi "github.com/google/cadvisor/info/v1"
v1 "k8s.io/api/core/v1"
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state"
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology"
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
@@ -72,6 +73,7 @@ func TestGetTopologyHints(t *testing.T) {
name string
pod v1.Pod
container v1.Container
assignments state.ContainerCPUAssignments
defaultCPUSet cpuset.CPUSet
expectedHints []topologymanager.TopologyHint
}{
@@ -142,6 +144,86 @@ func TestGetTopologyHints(t *testing.T) {
},
},
},
{
name: "Request more CPUs than available",
pod: *testPod2,
container: *testContainer2,
defaultCPUSet: cpuset.NewCPUSet(0, 1, 2, 3),
expectedHints: nil,
},
{
name: "Regenerate Single-Node NUMA Hints if already allocated 1/2",
pod: *testPod1,
container: *testContainer1,
assignments: state.ContainerCPUAssignments{
"": cpuset.NewCPUSet(0, 6),
},
defaultCPUSet: cpuset.NewCPUSet(),
expectedHints: []topologymanager.TopologyHint{
{
NUMANodeAffinity: firstSocketMask,
Preferred: true,
},
{
NUMANodeAffinity: crossSocketMask,
Preferred: false,
},
},
},
{
name: "Regenerate Single-Node NUMA Hints if already allocated 1/2",
pod: *testPod1,
container: *testContainer1,
assignments: state.ContainerCPUAssignments{
"": cpuset.NewCPUSet(3, 9),
},
defaultCPUSet: cpuset.NewCPUSet(),
expectedHints: []topologymanager.TopologyHint{
{
NUMANodeAffinity: secondSocketMask,
Preferred: true,
},
{
NUMANodeAffinity: crossSocketMask,
Preferred: false,
},
},
},
{
name: "Regenerate Cross-NUMA Hints if already allocated",
pod: *testPod4,
container: *testContainer4,
assignments: state.ContainerCPUAssignments{
"": cpuset.NewCPUSet(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10),
},
defaultCPUSet: cpuset.NewCPUSet(),
expectedHints: []topologymanager.TopologyHint{
{
NUMANodeAffinity: crossSocketMask,
Preferred: true,
},
},
},
{
name: "Requested less than already allocated",
pod: *testPod1,
container: *testContainer1,
assignments: state.ContainerCPUAssignments{
"": cpuset.NewCPUSet(0, 6, 3, 9),
},
defaultCPUSet: cpuset.NewCPUSet(),
expectedHints: []topologymanager.TopologyHint{},
},
{
name: "Requested more than already allocated",
pod: *testPod4,
container: *testContainer4,
assignments: state.ContainerCPUAssignments{
"": cpuset.NewCPUSet(0, 6, 3, 9),
},
defaultCPUSet: cpuset.NewCPUSet(),
expectedHints: []topologymanager.TopologyHint{},
},
}
for _, tc := range tcases {
topology, _ := topology.Discover(&machineInfo, numaNodeInfo)
@@ -151,9 +233,13 @@ func TestGetTopologyHints(t *testing.T) {
topology: topology,
},
state: &mockState{
assignments: tc.assignments,
defaultCPUSet: tc.defaultCPUSet,
},
topology: topology,
topology: topology,
activePods: func() []*v1.Pod { return nil },
podStatusProvider: mockPodStatusProvider{},
sourcesReady: &sourcesReadyStub{},
}
hints := m.GetTopologyHints(tc.pod, tc.container)[string(v1.ResourceCPU)]