mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 19:56:01 +00:00
memory manager: remove init containers once app container started
Remove init containers from the state file once the app container started, it will release the memory allocated for the init container and can intense the density of containers on the NUMA node in cases when the memory allocated for init containers is bigger than the memory allocated for app containers. Signed-off-by: Artyom Lukianov <alukiano@redhat.com>
This commit is contained in:
parent
b965502c49
commit
960da7895c
@ -193,6 +193,19 @@ func (m *manager) AddContainer(pod *v1.Pod, container *v1.Container, containerID
|
||||
defer m.Unlock()
|
||||
|
||||
m.containerMap.Add(string(pod.UID), container.Name, containerID)
|
||||
|
||||
// Since we know that each init container always runs to completion before
|
||||
// the next container starts, we can safely remove references to any previously
|
||||
// started init containers. This will free up the memory from these init containers
|
||||
// for use in other pods. If the current container happens to be an init container,
|
||||
// we skip deletion of it until the next container is added, and this is called again.
|
||||
for _, initContainer := range pod.Spec.InitContainers {
|
||||
if initContainer.Name == container.Name {
|
||||
break
|
||||
}
|
||||
|
||||
m.policyRemoveContainerByRef(string(pod.UID), initContainer.Name)
|
||||
}
|
||||
}
|
||||
|
||||
// GetMemoryNUMANodes provides NUMA nodes that used to allocate the container memory
|
||||
@ -243,11 +256,7 @@ func (m *manager) RemoveContainer(containerID string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
err = m.policyRemoveContainerByRef(podUID, containerName)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "RemoveContainer error")
|
||||
return err
|
||||
}
|
||||
m.policyRemoveContainerByRef(podUID, containerName)
|
||||
|
||||
return nil
|
||||
}
|
||||
@ -308,22 +317,15 @@ func (m *manager) removeStaleState() {
|
||||
for containerName := range assignments[podUID] {
|
||||
if _, ok := activeContainers[podUID][containerName]; !ok {
|
||||
klog.InfoS("RemoveStaleState removing state", "podUID", podUID, "containerName", containerName)
|
||||
err := m.policyRemoveContainerByRef(podUID, containerName)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "RemoveStaleState: failed to remove state", "podUID", podUID, "containerName", containerName)
|
||||
}
|
||||
m.policyRemoveContainerByRef(podUID, containerName)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m *manager) policyRemoveContainerByRef(podUID string, containerName string) error {
|
||||
err := m.policy.RemoveContainer(m.state, podUID, containerName)
|
||||
if err == nil {
|
||||
func (m *manager) policyRemoveContainerByRef(podUID string, containerName string) {
|
||||
m.policy.RemoveContainer(m.state, podUID, containerName)
|
||||
m.containerMap.RemoveByContainerRef(podUID, containerName)
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func getTotalMemoryTypeReserved(machineInfo *cadvisorapi.MachineInfo, reservedMemory []kubeletconfig.MemoryReservation) (map[v1.ResourceName]resource.Quantity, error) {
|
||||
|
@ -100,8 +100,7 @@ func (p *mockPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Containe
|
||||
return p.err
|
||||
}
|
||||
|
||||
func (p *mockPolicy) RemoveContainer(s state.State, podUID string, containerName string) error {
|
||||
return p.err
|
||||
func (p *mockPolicy) RemoveContainer(s state.State, podUID string, containerName string) {
|
||||
}
|
||||
|
||||
func (p *mockPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint {
|
||||
@ -1695,154 +1694,6 @@ func TestRemoveContainer(t *testing.T) {
|
||||
},
|
||||
expectedError: nil,
|
||||
},
|
||||
{
|
||||
description: "Should fail if policy returns an error",
|
||||
removeContainerID: "fakeID1",
|
||||
policyName: policyTypeMock,
|
||||
machineInfo: machineInfo,
|
||||
reserved: reserved,
|
||||
assignments: state.ContainerMemoryAssignments{
|
||||
"fakePod1": map[string][]state.Block{
|
||||
"fakeContainer1": {
|
||||
{
|
||||
NUMAAffinity: []int{0},
|
||||
Type: v1.ResourceMemory,
|
||||
Size: 1 * gb,
|
||||
},
|
||||
{
|
||||
NUMAAffinity: []int{0},
|
||||
Type: hugepages1Gi,
|
||||
Size: 1 * gb,
|
||||
},
|
||||
},
|
||||
"fakeContainer2": {
|
||||
{
|
||||
NUMAAffinity: []int{0},
|
||||
Type: v1.ResourceMemory,
|
||||
Size: 1 * gb,
|
||||
},
|
||||
{
|
||||
NUMAAffinity: []int{0},
|
||||
Type: hugepages1Gi,
|
||||
Size: 1 * gb,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedAssignments: state.ContainerMemoryAssignments{
|
||||
"fakePod1": map[string][]state.Block{
|
||||
"fakeContainer1": {
|
||||
{
|
||||
NUMAAffinity: []int{0},
|
||||
Type: v1.ResourceMemory,
|
||||
Size: 1 * gb,
|
||||
},
|
||||
{
|
||||
NUMAAffinity: []int{0},
|
||||
Type: hugepages1Gi,
|
||||
Size: 1 * gb,
|
||||
},
|
||||
},
|
||||
"fakeContainer2": {
|
||||
{
|
||||
NUMAAffinity: []int{0},
|
||||
Type: v1.ResourceMemory,
|
||||
Size: 1 * gb,
|
||||
},
|
||||
{
|
||||
NUMAAffinity: []int{0},
|
||||
Type: hugepages1Gi,
|
||||
Size: 1 * gb,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
machineState: state.NUMANodeMap{
|
||||
0: &state.NUMANodeState{
|
||||
Cells: []int{0},
|
||||
NumberOfAssignments: 4,
|
||||
MemoryMap: map[v1.ResourceName]*state.MemoryTable{
|
||||
v1.ResourceMemory: {
|
||||
Allocatable: 9 * gb,
|
||||
Free: 7 * gb,
|
||||
Reserved: 2 * gb,
|
||||
SystemReserved: 1 * gb,
|
||||
TotalMemSize: 10 * gb,
|
||||
},
|
||||
hugepages1Gi: {
|
||||
Allocatable: 5 * gb,
|
||||
Free: 3 * gb,
|
||||
Reserved: 2 * gb,
|
||||
SystemReserved: 0 * gb,
|
||||
TotalMemSize: 5 * gb,
|
||||
},
|
||||
},
|
||||
},
|
||||
1: &state.NUMANodeState{
|
||||
Cells: []int{1},
|
||||
NumberOfAssignments: 0,
|
||||
MemoryMap: map[v1.ResourceName]*state.MemoryTable{
|
||||
v1.ResourceMemory: {
|
||||
Allocatable: 9 * gb,
|
||||
Free: 9 * gb,
|
||||
Reserved: 0 * gb,
|
||||
SystemReserved: 1 * gb,
|
||||
TotalMemSize: 10 * gb,
|
||||
},
|
||||
hugepages1Gi: {
|
||||
Allocatable: 5 * gb,
|
||||
Free: 5 * gb,
|
||||
Reserved: 0,
|
||||
SystemReserved: 0,
|
||||
TotalMemSize: 5 * gb,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedMachineState: state.NUMANodeMap{
|
||||
0: &state.NUMANodeState{
|
||||
Cells: []int{0},
|
||||
NumberOfAssignments: 4,
|
||||
MemoryMap: map[v1.ResourceName]*state.MemoryTable{
|
||||
v1.ResourceMemory: {
|
||||
Allocatable: 9 * gb,
|
||||
Free: 7 * gb,
|
||||
Reserved: 2 * gb,
|
||||
SystemReserved: 1 * gb,
|
||||
TotalMemSize: 10 * gb,
|
||||
},
|
||||
hugepages1Gi: {
|
||||
Allocatable: 5 * gb,
|
||||
Free: 3 * gb,
|
||||
Reserved: 2 * gb,
|
||||
SystemReserved: 0 * gb,
|
||||
TotalMemSize: 5 * gb,
|
||||
},
|
||||
},
|
||||
},
|
||||
1: &state.NUMANodeState{
|
||||
Cells: []int{1},
|
||||
NumberOfAssignments: 0,
|
||||
MemoryMap: map[v1.ResourceName]*state.MemoryTable{
|
||||
v1.ResourceMemory: {
|
||||
Allocatable: 9 * gb,
|
||||
Free: 9 * gb,
|
||||
Reserved: 0 * gb,
|
||||
SystemReserved: 1 * gb,
|
||||
TotalMemSize: 10 * gb,
|
||||
},
|
||||
hugepages1Gi: {
|
||||
Allocatable: 5 * gb,
|
||||
Free: 5 * gb,
|
||||
Reserved: 0,
|
||||
SystemReserved: 0,
|
||||
TotalMemSize: 5 * gb,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedError: fmt.Errorf("fake reg error"),
|
||||
},
|
||||
{
|
||||
description: "Should do nothing if container is not in containerMap",
|
||||
removeContainerID: "fakeID3",
|
||||
|
@ -32,7 +32,7 @@ type Policy interface {
|
||||
// Allocate call is idempotent
|
||||
Allocate(s state.State, pod *v1.Pod, container *v1.Container) error
|
||||
// RemoveContainer call is idempotent
|
||||
RemoveContainer(s state.State, podUID string, containerName string) error
|
||||
RemoveContainer(s state.State, podUID string, containerName string)
|
||||
// GetTopologyHints implements the topologymanager.HintProvider Interface
|
||||
// and is consulted to achieve NUMA aware resource alignment among this
|
||||
// and other resource controllers.
|
||||
|
@ -49,8 +49,7 @@ func (p *none) Allocate(s state.State, pod *v1.Pod, container *v1.Container) err
|
||||
}
|
||||
|
||||
// RemoveContainer call is idempotent
|
||||
func (p *none) RemoveContainer(s state.State, podUID string, containerName string) error {
|
||||
return nil
|
||||
func (p *none) RemoveContainer(s state.State, podUID string, containerName string) {
|
||||
}
|
||||
|
||||
// GetTopologyHints implements the topologymanager.HintProvider Interface
|
||||
|
@ -169,10 +169,10 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai
|
||||
s.SetMemoryBlocks(podUID, container.Name, containerBlocks)
|
||||
|
||||
// update init containers memory blocks to reflect the fact that we re-used init containers memory
|
||||
// it possible that the size of the init container memory block will have 0 value, when all memory
|
||||
// it is possible that the size of the init container memory block will have 0 value, when all memory
|
||||
// allocated for it was re-used
|
||||
// we only do this so that the sum(memory_for_all_containers) == total amount of allocated memory to the pod, even
|
||||
// though the fine state here doesn't accurately reflect what was (in reality) allocated to each container
|
||||
// though the final state here doesn't accurately reflect what was (in reality) allocated to each container
|
||||
// TODO: we should refactor our state structs to reflect the amount of the re-used memory
|
||||
p.updateInitContainersMemoryBlocks(s, pod, container, containerBlocks)
|
||||
|
||||
@ -225,13 +225,13 @@ func (p *staticPolicy) getPodReusableMemory(pod *v1.Pod, numaAffinity bitmask.Bi
|
||||
}
|
||||
|
||||
// RemoveContainer call is idempotent
|
||||
func (p *staticPolicy) RemoveContainer(s state.State, podUID string, containerName string) error {
|
||||
klog.InfoS("RemoveContainer", "podUID", podUID, "containerName", containerName)
|
||||
func (p *staticPolicy) RemoveContainer(s state.State, podUID string, containerName string) {
|
||||
blocks := s.GetMemoryBlocks(podUID, containerName)
|
||||
if blocks == nil {
|
||||
return nil
|
||||
return
|
||||
}
|
||||
|
||||
klog.InfoS("RemoveContainer", "podUID", podUID, "containerName", containerName)
|
||||
s.Delete(podUID, containerName)
|
||||
|
||||
// Mutate machine memory state to update free and reserved memory
|
||||
@ -275,8 +275,6 @@ func (p *staticPolicy) RemoveContainer(s state.State, podUID string, containerNa
|
||||
}
|
||||
|
||||
s.SetMachineState(machineState)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func regenerateHints(pod *v1.Pod, ctn *v1.Container, ctnBlocks []state.Block, reqRsrc map[v1.ResourceName]uint64) map[string][]topologymanager.TopologyHint {
|
||||
|
@ -2031,15 +2031,7 @@ func TestStaticPolicyRemoveContainer(t *testing.T) {
|
||||
t.Fatalf("Unexpected error: %v", err)
|
||||
}
|
||||
|
||||
err = p.RemoveContainer(s, "pod1", "container1")
|
||||
if !reflect.DeepEqual(err, testCase.expectedError) {
|
||||
t.Fatalf("The actual error %v is different from the expected one %v", err, testCase.expectedError)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
p.RemoveContainer(s, "pod1", "container1")
|
||||
assignments := s.GetMemoryAssignments()
|
||||
if !areContainerMemoryAssignmentsEqual(t, assignments, testCase.expectedAssignments) {
|
||||
t.Fatalf("Actual assignments %v are different from the expected %v", assignments, testCase.expectedAssignments)
|
||||
|
Loading…
Reference in New Issue
Block a user