From 960da7895cc660b3daab022aaa9719a47fe9de3b Mon Sep 17 00:00:00 2001 From: Artyom Lukianov Date: Wed, 24 Feb 2021 13:39:14 +0200 Subject: [PATCH] memory manager: remove init containers once app container started Remove init containers from the state file once the app container started, it will release the memory allocated for the init container and can intense the density of containers on the NUMA node in cases when the memory allocated for init containers is bigger than the memory allocated for app containers. Signed-off-by: Artyom Lukianov --- .../cm/memorymanager/memory_manager.go | 34 ++-- .../cm/memorymanager/memory_manager_test.go | 151 +----------------- pkg/kubelet/cm/memorymanager/policy.go | 2 +- pkg/kubelet/cm/memorymanager/policy_none.go | 3 +- pkg/kubelet/cm/memorymanager/policy_static.go | 12 +- .../cm/memorymanager/policy_static_test.go | 10 +- 6 files changed, 27 insertions(+), 185 deletions(-) diff --git a/pkg/kubelet/cm/memorymanager/memory_manager.go b/pkg/kubelet/cm/memorymanager/memory_manager.go index 7d853eca089..4c8a786c5e8 100644 --- a/pkg/kubelet/cm/memorymanager/memory_manager.go +++ b/pkg/kubelet/cm/memorymanager/memory_manager.go @@ -193,6 +193,19 @@ func (m *manager) AddContainer(pod *v1.Pod, container *v1.Container, containerID defer m.Unlock() m.containerMap.Add(string(pod.UID), container.Name, containerID) + + // Since we know that each init container always runs to completion before + // the next container starts, we can safely remove references to any previously + // started init containers. This will free up the memory from these init containers + // for use in other pods. If the current container happens to be an init container, + // we skip deletion of it until the next container is added, and this is called again. + for _, initContainer := range pod.Spec.InitContainers { + if initContainer.Name == container.Name { + break + } + + m.policyRemoveContainerByRef(string(pod.UID), initContainer.Name) + } } // GetMemoryNUMANodes provides NUMA nodes that used to allocate the container memory @@ -243,11 +256,7 @@ func (m *manager) RemoveContainer(containerID string) error { return nil } - err = m.policyRemoveContainerByRef(podUID, containerName) - if err != nil { - klog.ErrorS(err, "RemoveContainer error") - return err - } + m.policyRemoveContainerByRef(podUID, containerName) return nil } @@ -308,22 +317,15 @@ func (m *manager) removeStaleState() { for containerName := range assignments[podUID] { if _, ok := activeContainers[podUID][containerName]; !ok { klog.InfoS("RemoveStaleState removing state", "podUID", podUID, "containerName", containerName) - err := m.policyRemoveContainerByRef(podUID, containerName) - if err != nil { - klog.ErrorS(err, "RemoveStaleState: failed to remove state", "podUID", podUID, "containerName", containerName) - } + m.policyRemoveContainerByRef(podUID, containerName) } } } } -func (m *manager) policyRemoveContainerByRef(podUID string, containerName string) error { - err := m.policy.RemoveContainer(m.state, podUID, containerName) - if err == nil { - m.containerMap.RemoveByContainerRef(podUID, containerName) - } - - return err +func (m *manager) policyRemoveContainerByRef(podUID string, containerName string) { + m.policy.RemoveContainer(m.state, podUID, containerName) + m.containerMap.RemoveByContainerRef(podUID, containerName) } func getTotalMemoryTypeReserved(machineInfo *cadvisorapi.MachineInfo, reservedMemory []kubeletconfig.MemoryReservation) (map[v1.ResourceName]resource.Quantity, error) { diff --git a/pkg/kubelet/cm/memorymanager/memory_manager_test.go b/pkg/kubelet/cm/memorymanager/memory_manager_test.go index f55b4921172..eaae7925836 100644 --- a/pkg/kubelet/cm/memorymanager/memory_manager_test.go +++ b/pkg/kubelet/cm/memorymanager/memory_manager_test.go @@ -100,8 +100,7 @@ func (p *mockPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Containe return p.err } -func (p *mockPolicy) RemoveContainer(s state.State, podUID string, containerName string) error { - return p.err +func (p *mockPolicy) RemoveContainer(s state.State, podUID string, containerName string) { } func (p *mockPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint { @@ -1695,154 +1694,6 @@ func TestRemoveContainer(t *testing.T) { }, expectedError: nil, }, - { - description: "Should fail if policy returns an error", - removeContainerID: "fakeID1", - policyName: policyTypeMock, - machineInfo: machineInfo, - reserved: reserved, - assignments: state.ContainerMemoryAssignments{ - "fakePod1": map[string][]state.Block{ - "fakeContainer1": { - { - NUMAAffinity: []int{0}, - Type: v1.ResourceMemory, - Size: 1 * gb, - }, - { - NUMAAffinity: []int{0}, - Type: hugepages1Gi, - Size: 1 * gb, - }, - }, - "fakeContainer2": { - { - NUMAAffinity: []int{0}, - Type: v1.ResourceMemory, - Size: 1 * gb, - }, - { - NUMAAffinity: []int{0}, - Type: hugepages1Gi, - Size: 1 * gb, - }, - }, - }, - }, - expectedAssignments: state.ContainerMemoryAssignments{ - "fakePod1": map[string][]state.Block{ - "fakeContainer1": { - { - NUMAAffinity: []int{0}, - Type: v1.ResourceMemory, - Size: 1 * gb, - }, - { - NUMAAffinity: []int{0}, - Type: hugepages1Gi, - Size: 1 * gb, - }, - }, - "fakeContainer2": { - { - NUMAAffinity: []int{0}, - Type: v1.ResourceMemory, - Size: 1 * gb, - }, - { - NUMAAffinity: []int{0}, - Type: hugepages1Gi, - Size: 1 * gb, - }, - }, - }, - }, - machineState: state.NUMANodeMap{ - 0: &state.NUMANodeState{ - Cells: []int{0}, - NumberOfAssignments: 4, - MemoryMap: map[v1.ResourceName]*state.MemoryTable{ - v1.ResourceMemory: { - Allocatable: 9 * gb, - Free: 7 * gb, - Reserved: 2 * gb, - SystemReserved: 1 * gb, - TotalMemSize: 10 * gb, - }, - hugepages1Gi: { - Allocatable: 5 * gb, - Free: 3 * gb, - Reserved: 2 * gb, - SystemReserved: 0 * gb, - TotalMemSize: 5 * gb, - }, - }, - }, - 1: &state.NUMANodeState{ - Cells: []int{1}, - NumberOfAssignments: 0, - MemoryMap: map[v1.ResourceName]*state.MemoryTable{ - v1.ResourceMemory: { - Allocatable: 9 * gb, - Free: 9 * gb, - Reserved: 0 * gb, - SystemReserved: 1 * gb, - TotalMemSize: 10 * gb, - }, - hugepages1Gi: { - Allocatable: 5 * gb, - Free: 5 * gb, - Reserved: 0, - SystemReserved: 0, - TotalMemSize: 5 * gb, - }, - }, - }, - }, - expectedMachineState: state.NUMANodeMap{ - 0: &state.NUMANodeState{ - Cells: []int{0}, - NumberOfAssignments: 4, - MemoryMap: map[v1.ResourceName]*state.MemoryTable{ - v1.ResourceMemory: { - Allocatable: 9 * gb, - Free: 7 * gb, - Reserved: 2 * gb, - SystemReserved: 1 * gb, - TotalMemSize: 10 * gb, - }, - hugepages1Gi: { - Allocatable: 5 * gb, - Free: 3 * gb, - Reserved: 2 * gb, - SystemReserved: 0 * gb, - TotalMemSize: 5 * gb, - }, - }, - }, - 1: &state.NUMANodeState{ - Cells: []int{1}, - NumberOfAssignments: 0, - MemoryMap: map[v1.ResourceName]*state.MemoryTable{ - v1.ResourceMemory: { - Allocatable: 9 * gb, - Free: 9 * gb, - Reserved: 0 * gb, - SystemReserved: 1 * gb, - TotalMemSize: 10 * gb, - }, - hugepages1Gi: { - Allocatable: 5 * gb, - Free: 5 * gb, - Reserved: 0, - SystemReserved: 0, - TotalMemSize: 5 * gb, - }, - }, - }, - }, - expectedError: fmt.Errorf("fake reg error"), - }, { description: "Should do nothing if container is not in containerMap", removeContainerID: "fakeID3", diff --git a/pkg/kubelet/cm/memorymanager/policy.go b/pkg/kubelet/cm/memorymanager/policy.go index 9a8e1a9bb0b..a65a90dca98 100644 --- a/pkg/kubelet/cm/memorymanager/policy.go +++ b/pkg/kubelet/cm/memorymanager/policy.go @@ -32,7 +32,7 @@ type Policy interface { // Allocate call is idempotent Allocate(s state.State, pod *v1.Pod, container *v1.Container) error // RemoveContainer call is idempotent - RemoveContainer(s state.State, podUID string, containerName string) error + RemoveContainer(s state.State, podUID string, containerName string) // GetTopologyHints implements the topologymanager.HintProvider Interface // and is consulted to achieve NUMA aware resource alignment among this // and other resource controllers. diff --git a/pkg/kubelet/cm/memorymanager/policy_none.go b/pkg/kubelet/cm/memorymanager/policy_none.go index e2020158621..5bb52db7ed4 100644 --- a/pkg/kubelet/cm/memorymanager/policy_none.go +++ b/pkg/kubelet/cm/memorymanager/policy_none.go @@ -49,8 +49,7 @@ func (p *none) Allocate(s state.State, pod *v1.Pod, container *v1.Container) err } // RemoveContainer call is idempotent -func (p *none) RemoveContainer(s state.State, podUID string, containerName string) error { - return nil +func (p *none) RemoveContainer(s state.State, podUID string, containerName string) { } // GetTopologyHints implements the topologymanager.HintProvider Interface diff --git a/pkg/kubelet/cm/memorymanager/policy_static.go b/pkg/kubelet/cm/memorymanager/policy_static.go index 333860188c8..d2817de1cab 100644 --- a/pkg/kubelet/cm/memorymanager/policy_static.go +++ b/pkg/kubelet/cm/memorymanager/policy_static.go @@ -169,10 +169,10 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai s.SetMemoryBlocks(podUID, container.Name, containerBlocks) // update init containers memory blocks to reflect the fact that we re-used init containers memory - // it possible that the size of the init container memory block will have 0 value, when all memory + // it is possible that the size of the init container memory block will have 0 value, when all memory // allocated for it was re-used // we only do this so that the sum(memory_for_all_containers) == total amount of allocated memory to the pod, even - // though the fine state here doesn't accurately reflect what was (in reality) allocated to each container + // though the final state here doesn't accurately reflect what was (in reality) allocated to each container // TODO: we should refactor our state structs to reflect the amount of the re-used memory p.updateInitContainersMemoryBlocks(s, pod, container, containerBlocks) @@ -225,13 +225,13 @@ func (p *staticPolicy) getPodReusableMemory(pod *v1.Pod, numaAffinity bitmask.Bi } // RemoveContainer call is idempotent -func (p *staticPolicy) RemoveContainer(s state.State, podUID string, containerName string) error { - klog.InfoS("RemoveContainer", "podUID", podUID, "containerName", containerName) +func (p *staticPolicy) RemoveContainer(s state.State, podUID string, containerName string) { blocks := s.GetMemoryBlocks(podUID, containerName) if blocks == nil { - return nil + return } + klog.InfoS("RemoveContainer", "podUID", podUID, "containerName", containerName) s.Delete(podUID, containerName) // Mutate machine memory state to update free and reserved memory @@ -275,8 +275,6 @@ func (p *staticPolicy) RemoveContainer(s state.State, podUID string, containerNa } s.SetMachineState(machineState) - - return nil } func regenerateHints(pod *v1.Pod, ctn *v1.Container, ctnBlocks []state.Block, reqRsrc map[v1.ResourceName]uint64) map[string][]topologymanager.TopologyHint { diff --git a/pkg/kubelet/cm/memorymanager/policy_static_test.go b/pkg/kubelet/cm/memorymanager/policy_static_test.go index d583437be65..f519411cab9 100644 --- a/pkg/kubelet/cm/memorymanager/policy_static_test.go +++ b/pkg/kubelet/cm/memorymanager/policy_static_test.go @@ -2031,15 +2031,7 @@ func TestStaticPolicyRemoveContainer(t *testing.T) { t.Fatalf("Unexpected error: %v", err) } - err = p.RemoveContainer(s, "pod1", "container1") - if !reflect.DeepEqual(err, testCase.expectedError) { - t.Fatalf("The actual error %v is different from the expected one %v", err, testCase.expectedError) - } - - if err != nil { - return - } - + p.RemoveContainer(s, "pod1", "container1") assignments := s.GetMemoryAssignments() if !areContainerMemoryAssignmentsEqual(t, assignments, testCase.expectedAssignments) { t.Fatalf("Actual assignments %v are different from the expected %v", assignments, testCase.expectedAssignments)