memory manager: remove init containers once app container started

Remove init containers from the state file once the app container started,
it will release the memory allocated for the init container and can intense
the density of containers on the NUMA node in cases when the memory allocated
for init containers is bigger than the memory allocated for app containers.

Signed-off-by: Artyom Lukianov <alukiano@redhat.com>
This commit is contained in:
Artyom Lukianov 2021-02-24 13:39:14 +02:00
parent b965502c49
commit 960da7895c
6 changed files with 27 additions and 185 deletions

View File

@ -193,6 +193,19 @@ func (m *manager) AddContainer(pod *v1.Pod, container *v1.Container, containerID
defer m.Unlock()
m.containerMap.Add(string(pod.UID), container.Name, containerID)
// Since we know that each init container always runs to completion before
// the next container starts, we can safely remove references to any previously
// started init containers. This will free up the memory from these init containers
// for use in other pods. If the current container happens to be an init container,
// we skip deletion of it until the next container is added, and this is called again.
for _, initContainer := range pod.Spec.InitContainers {
if initContainer.Name == container.Name {
break
}
m.policyRemoveContainerByRef(string(pod.UID), initContainer.Name)
}
}
// GetMemoryNUMANodes provides NUMA nodes that used to allocate the container memory
@ -243,11 +256,7 @@ func (m *manager) RemoveContainer(containerID string) error {
return nil
}
err = m.policyRemoveContainerByRef(podUID, containerName)
if err != nil {
klog.ErrorS(err, "RemoveContainer error")
return err
}
m.policyRemoveContainerByRef(podUID, containerName)
return nil
}
@ -308,22 +317,15 @@ func (m *manager) removeStaleState() {
for containerName := range assignments[podUID] {
if _, ok := activeContainers[podUID][containerName]; !ok {
klog.InfoS("RemoveStaleState removing state", "podUID", podUID, "containerName", containerName)
err := m.policyRemoveContainerByRef(podUID, containerName)
if err != nil {
klog.ErrorS(err, "RemoveStaleState: failed to remove state", "podUID", podUID, "containerName", containerName)
}
m.policyRemoveContainerByRef(podUID, containerName)
}
}
}
}
func (m *manager) policyRemoveContainerByRef(podUID string, containerName string) error {
err := m.policy.RemoveContainer(m.state, podUID, containerName)
if err == nil {
m.containerMap.RemoveByContainerRef(podUID, containerName)
}
return err
func (m *manager) policyRemoveContainerByRef(podUID string, containerName string) {
m.policy.RemoveContainer(m.state, podUID, containerName)
m.containerMap.RemoveByContainerRef(podUID, containerName)
}
func getTotalMemoryTypeReserved(machineInfo *cadvisorapi.MachineInfo, reservedMemory []kubeletconfig.MemoryReservation) (map[v1.ResourceName]resource.Quantity, error) {

View File

@ -100,8 +100,7 @@ func (p *mockPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Containe
return p.err
}
func (p *mockPolicy) RemoveContainer(s state.State, podUID string, containerName string) error {
return p.err
func (p *mockPolicy) RemoveContainer(s state.State, podUID string, containerName string) {
}
func (p *mockPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint {
@ -1695,154 +1694,6 @@ func TestRemoveContainer(t *testing.T) {
},
expectedError: nil,
},
{
description: "Should fail if policy returns an error",
removeContainerID: "fakeID1",
policyName: policyTypeMock,
machineInfo: machineInfo,
reserved: reserved,
assignments: state.ContainerMemoryAssignments{
"fakePod1": map[string][]state.Block{
"fakeContainer1": {
{
NUMAAffinity: []int{0},
Type: v1.ResourceMemory,
Size: 1 * gb,
},
{
NUMAAffinity: []int{0},
Type: hugepages1Gi,
Size: 1 * gb,
},
},
"fakeContainer2": {
{
NUMAAffinity: []int{0},
Type: v1.ResourceMemory,
Size: 1 * gb,
},
{
NUMAAffinity: []int{0},
Type: hugepages1Gi,
Size: 1 * gb,
},
},
},
},
expectedAssignments: state.ContainerMemoryAssignments{
"fakePod1": map[string][]state.Block{
"fakeContainer1": {
{
NUMAAffinity: []int{0},
Type: v1.ResourceMemory,
Size: 1 * gb,
},
{
NUMAAffinity: []int{0},
Type: hugepages1Gi,
Size: 1 * gb,
},
},
"fakeContainer2": {
{
NUMAAffinity: []int{0},
Type: v1.ResourceMemory,
Size: 1 * gb,
},
{
NUMAAffinity: []int{0},
Type: hugepages1Gi,
Size: 1 * gb,
},
},
},
},
machineState: state.NUMANodeMap{
0: &state.NUMANodeState{
Cells: []int{0},
NumberOfAssignments: 4,
MemoryMap: map[v1.ResourceName]*state.MemoryTable{
v1.ResourceMemory: {
Allocatable: 9 * gb,
Free: 7 * gb,
Reserved: 2 * gb,
SystemReserved: 1 * gb,
TotalMemSize: 10 * gb,
},
hugepages1Gi: {
Allocatable: 5 * gb,
Free: 3 * gb,
Reserved: 2 * gb,
SystemReserved: 0 * gb,
TotalMemSize: 5 * gb,
},
},
},
1: &state.NUMANodeState{
Cells: []int{1},
NumberOfAssignments: 0,
MemoryMap: map[v1.ResourceName]*state.MemoryTable{
v1.ResourceMemory: {
Allocatable: 9 * gb,
Free: 9 * gb,
Reserved: 0 * gb,
SystemReserved: 1 * gb,
TotalMemSize: 10 * gb,
},
hugepages1Gi: {
Allocatable: 5 * gb,
Free: 5 * gb,
Reserved: 0,
SystemReserved: 0,
TotalMemSize: 5 * gb,
},
},
},
},
expectedMachineState: state.NUMANodeMap{
0: &state.NUMANodeState{
Cells: []int{0},
NumberOfAssignments: 4,
MemoryMap: map[v1.ResourceName]*state.MemoryTable{
v1.ResourceMemory: {
Allocatable: 9 * gb,
Free: 7 * gb,
Reserved: 2 * gb,
SystemReserved: 1 * gb,
TotalMemSize: 10 * gb,
},
hugepages1Gi: {
Allocatable: 5 * gb,
Free: 3 * gb,
Reserved: 2 * gb,
SystemReserved: 0 * gb,
TotalMemSize: 5 * gb,
},
},
},
1: &state.NUMANodeState{
Cells: []int{1},
NumberOfAssignments: 0,
MemoryMap: map[v1.ResourceName]*state.MemoryTable{
v1.ResourceMemory: {
Allocatable: 9 * gb,
Free: 9 * gb,
Reserved: 0 * gb,
SystemReserved: 1 * gb,
TotalMemSize: 10 * gb,
},
hugepages1Gi: {
Allocatable: 5 * gb,
Free: 5 * gb,
Reserved: 0,
SystemReserved: 0,
TotalMemSize: 5 * gb,
},
},
},
},
expectedError: fmt.Errorf("fake reg error"),
},
{
description: "Should do nothing if container is not in containerMap",
removeContainerID: "fakeID3",

View File

@ -32,7 +32,7 @@ type Policy interface {
// Allocate call is idempotent
Allocate(s state.State, pod *v1.Pod, container *v1.Container) error
// RemoveContainer call is idempotent
RemoveContainer(s state.State, podUID string, containerName string) error
RemoveContainer(s state.State, podUID string, containerName string)
// GetTopologyHints implements the topologymanager.HintProvider Interface
// and is consulted to achieve NUMA aware resource alignment among this
// and other resource controllers.

View File

@ -49,8 +49,7 @@ func (p *none) Allocate(s state.State, pod *v1.Pod, container *v1.Container) err
}
// RemoveContainer call is idempotent
func (p *none) RemoveContainer(s state.State, podUID string, containerName string) error {
return nil
func (p *none) RemoveContainer(s state.State, podUID string, containerName string) {
}
// GetTopologyHints implements the topologymanager.HintProvider Interface

View File

@ -169,10 +169,10 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai
s.SetMemoryBlocks(podUID, container.Name, containerBlocks)
// update init containers memory blocks to reflect the fact that we re-used init containers memory
// it possible that the size of the init container memory block will have 0 value, when all memory
// it is possible that the size of the init container memory block will have 0 value, when all memory
// allocated for it was re-used
// we only do this so that the sum(memory_for_all_containers) == total amount of allocated memory to the pod, even
// though the fine state here doesn't accurately reflect what was (in reality) allocated to each container
// though the final state here doesn't accurately reflect what was (in reality) allocated to each container
// TODO: we should refactor our state structs to reflect the amount of the re-used memory
p.updateInitContainersMemoryBlocks(s, pod, container, containerBlocks)
@ -225,13 +225,13 @@ func (p *staticPolicy) getPodReusableMemory(pod *v1.Pod, numaAffinity bitmask.Bi
}
// RemoveContainer call is idempotent
func (p *staticPolicy) RemoveContainer(s state.State, podUID string, containerName string) error {
klog.InfoS("RemoveContainer", "podUID", podUID, "containerName", containerName)
func (p *staticPolicy) RemoveContainer(s state.State, podUID string, containerName string) {
blocks := s.GetMemoryBlocks(podUID, containerName)
if blocks == nil {
return nil
return
}
klog.InfoS("RemoveContainer", "podUID", podUID, "containerName", containerName)
s.Delete(podUID, containerName)
// Mutate machine memory state to update free and reserved memory
@ -275,8 +275,6 @@ func (p *staticPolicy) RemoveContainer(s state.State, podUID string, containerNa
}
s.SetMachineState(machineState)
return nil
}
func regenerateHints(pod *v1.Pod, ctn *v1.Container, ctnBlocks []state.Block, reqRsrc map[v1.ResourceName]uint64) map[string][]topologymanager.TopologyHint {

View File

@ -2031,15 +2031,7 @@ func TestStaticPolicyRemoveContainer(t *testing.T) {
t.Fatalf("Unexpected error: %v", err)
}
err = p.RemoveContainer(s, "pod1", "container1")
if !reflect.DeepEqual(err, testCase.expectedError) {
t.Fatalf("The actual error %v is different from the expected one %v", err, testCase.expectedError)
}
if err != nil {
return
}
p.RemoveContainer(s, "pod1", "container1")
assignments := s.GetMemoryAssignments()
if !areContainerMemoryAssignmentsEqual(t, assignments, testCase.expectedAssignments) {
t.Fatalf("Actual assignments %v are different from the expected %v", assignments, testCase.expectedAssignments)