Merge pull request #120715 from gjkim42/do-not-reuse-memory-of-restartable-init-containers

Don't reuse memory of a restartable init container
This commit is contained in:
Kubernetes Prow Robot 2023-11-01 01:50:45 +01:00 committed by GitHub
commit 960431407c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 653 additions and 11 deletions

View File

@ -35,6 +35,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
"k8s.io/kubernetes/pkg/kubelet/config"
"k8s.io/kubernetes/pkg/kubelet/status"
"k8s.io/kubernetes/pkg/kubelet/types"
)
// memoryManagerStateFileName is the file name where memory manager stores its state
@ -208,6 +209,13 @@ func (m *manager) AddContainer(pod *v1.Pod, container *v1.Container, containerID
break
}
// Since a restartable init container remains running for the full
// duration of the pod's lifecycle, we should not remove it from the
// memory manager state.
if types.IsRestartableInitContainer(&initContainer) {
continue
}
m.policyRemoveContainerByRef(string(pod.UID), initContainer.Name)
}
}

View File

@ -34,6 +34,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state"
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask"
"k8s.io/kubernetes/pkg/kubelet/types"
)
const policyTypeStatic policyType = "Static"
@ -49,7 +50,10 @@ type staticPolicy struct {
systemReserved systemReservedMemory
// topology manager reference to get container Topology affinity
affinity topologymanager.Store
// initContainersReusableMemory contains the memory allocated for init containers that can be reused
// initContainersReusableMemory contains the memory allocated for init
// containers that can be reused.
// Note that the restartable init container memory is not included here,
// because it is not reusable.
initContainersReusableMemory reusableMemory
}
@ -318,9 +322,10 @@ func regenerateHints(pod *v1.Pod, ctn *v1.Container, ctnBlocks []state.Block, re
}
func getPodRequestedResources(pod *v1.Pod) (map[v1.ResourceName]uint64, error) {
// Maximun resources requested by init containers at any given time.
reqRsrcsByInitCtrs := make(map[v1.ResourceName]uint64)
reqRsrcsByAppCtrs := make(map[v1.ResourceName]uint64)
// Total resources requested by restartable init containers.
reqRsrcsByRestartableInitCtrs := make(map[v1.ResourceName]uint64)
for _, ctr := range pod.Spec.InitContainers {
reqRsrcs, err := getRequestedResources(pod, &ctr)
@ -332,12 +337,17 @@ func getPodRequestedResources(pod *v1.Pod) (map[v1.ResourceName]uint64, error) {
reqRsrcsByInitCtrs[rsrcName] = uint64(0)
}
if reqRsrcs[rsrcName] > reqRsrcsByInitCtrs[rsrcName] {
reqRsrcsByInitCtrs[rsrcName] = qty
// See https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/753-sidecar-containers#resources-calculation-for-scheduling-and-pod-admission
// for the detail.
if types.IsRestartableInitContainer(&ctr) {
reqRsrcsByRestartableInitCtrs[rsrcName] += qty
} else if reqRsrcsByRestartableInitCtrs[rsrcName]+qty > reqRsrcsByInitCtrs[rsrcName] {
reqRsrcsByInitCtrs[rsrcName] = reqRsrcsByRestartableInitCtrs[rsrcName] + qty
}
}
}
reqRsrcsByAppCtrs := make(map[v1.ResourceName]uint64)
for _, ctr := range pod.Spec.Containers {
reqRsrcs, err := getRequestedResources(pod, &ctr)
@ -353,12 +363,17 @@ func getPodRequestedResources(pod *v1.Pod) (map[v1.ResourceName]uint64, error) {
}
}
reqRsrcs := make(map[v1.ResourceName]uint64)
for rsrcName := range reqRsrcsByAppCtrs {
if reqRsrcsByInitCtrs[rsrcName] > reqRsrcsByAppCtrs[rsrcName] {
reqRsrcsByAppCtrs[rsrcName] = reqRsrcsByInitCtrs[rsrcName]
// Total resources requested by long-running containers.
reqRsrcsByLongRunningCtrs := reqRsrcsByAppCtrs[rsrcName] + reqRsrcsByRestartableInitCtrs[rsrcName]
reqRsrcs[rsrcName] = reqRsrcsByLongRunningCtrs
if reqRsrcs[rsrcName] < reqRsrcsByInitCtrs[rsrcName] {
reqRsrcs[rsrcName] = reqRsrcsByInitCtrs[rsrcName]
}
}
return reqRsrcsByAppCtrs, nil
return reqRsrcs, nil
}
func (p *staticPolicy) GetPodTopologyHints(s state.State, pod *v1.Pod) map[string][]topologymanager.TopologyHint {
@ -856,7 +871,7 @@ func (p *staticPolicy) updatePodReusableMemory(pod *v1.Pod, container *v1.Contai
}
}
if isInitContainer(pod, container) {
if isRegularInitContainer(pod, container) {
if _, ok := p.initContainersReusableMemory[podUID]; !ok {
p.initContainersReusableMemory[podUID] = map[string]map[v1.ResourceName]uint64{}
}
@ -905,6 +920,12 @@ func (p *staticPolicy) updateInitContainersMemoryBlocks(s state.State, pod *v1.P
break
}
if types.IsRestartableInitContainer(&initContainer) {
// we should not reuse the resource from any restartable init
// container
continue
}
initContainerBlocks := s.GetMemoryBlocks(podUID, initContainer.Name)
if len(initContainerBlocks) == 0 {
continue
@ -938,10 +959,10 @@ func (p *staticPolicy) updateInitContainersMemoryBlocks(s state.State, pod *v1.P
}
}
func isInitContainer(pod *v1.Pod, container *v1.Container) bool {
func isRegularInitContainer(pod *v1.Pod, container *v1.Container) bool {
for _, initContainer := range pod.Spec.InitContainers {
if initContainer.Name == container.Name {
return true
return !types.IsRestartableInitContainer(&initContainer)
}
}

View File

@ -24,6 +24,7 @@ import (
"k8s.io/klog/v2"
cadvisorapi "github.com/google/cadvisor/info/v1"
"github.com/google/go-cmp/cmp"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
@ -40,6 +41,8 @@ const (
)
var (
containerRestartPolicyAlways = v1.ContainerRestartPolicyAlways
requirementsGuaranteed = &v1.ResourceRequirements{
Limits: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1000Mi"),
@ -2522,6 +2525,343 @@ func TestStaticPolicyAllocateWithInitContainers(t *testing.T) {
}
}
func TestStaticPolicyAllocateWithRestartableInitContainers(t *testing.T) {
testCases := []testStaticPolicy{
{
description: "should do nothing once containers already exist under the state file",
assignments: state.ContainerMemoryAssignments{
"pod1": map[string][]state.Block{
"initContainer1": {
{
NUMAAffinity: []int{0},
Type: v1.ResourceMemory,
Size: gb,
},
},
"container1": {
{
NUMAAffinity: []int{0},
Type: v1.ResourceMemory,
Size: gb,
},
},
},
},
expectedAssignments: state.ContainerMemoryAssignments{
"pod1": map[string][]state.Block{
"initContainer1": {
{
NUMAAffinity: []int{0},
Type: v1.ResourceMemory,
Size: gb,
},
},
"container1": {
{
NUMAAffinity: []int{0},
Type: v1.ResourceMemory,
Size: gb,
},
},
},
},
machineState: state.NUMANodeMap{
0: &state.NUMANodeState{
MemoryMap: map[v1.ResourceName]*state.MemoryTable{
v1.ResourceMemory: {
Allocatable: 2560 * mb,
Free: 512 * mb,
Reserved: 2048 * mb,
SystemReserved: 512 * mb,
TotalMemSize: 3 * gb,
},
hugepages1Gi: {
Allocatable: 2 * gb,
Free: 2 * gb,
Reserved: 0,
SystemReserved: 0,
TotalMemSize: 2 * gb,
},
},
Cells: []int{},
},
},
expectedMachineState: state.NUMANodeMap{
0: &state.NUMANodeState{
MemoryMap: map[v1.ResourceName]*state.MemoryTable{
v1.ResourceMemory: {
Allocatable: 2560 * mb,
Free: 512 * mb,
Reserved: 2048 * mb,
SystemReserved: 512 * mb,
TotalMemSize: 3 * gb,
},
hugepages1Gi: {
Allocatable: 2 * gb,
Free: 2 * gb,
Reserved: 0,
SystemReserved: 0,
TotalMemSize: 2 * gb,
},
},
Cells: []int{},
},
},
systemReserved: systemReservedMemory{
0: map[v1.ResourceName]uint64{
v1.ResourceMemory: 512 * mb,
},
},
pod: getPodWithInitContainers(
"pod1",
[]v1.Container{
{
Name: "container1",
Resources: *requirementsGuaranteed,
},
},
[]v1.Container{
{
Name: "initContainer1",
Resources: *requirementsGuaranteed,
RestartPolicy: &containerRestartPolicyAlways,
},
},
),
expectedTopologyHints: nil,
topologyHint: &topologymanager.TopologyHint{},
},
{
description: "should not re-use restartable init containers memory",
assignments: state.ContainerMemoryAssignments{},
expectedAssignments: state.ContainerMemoryAssignments{
"pod1": map[string][]state.Block{
"initContainer1": {
{
NUMAAffinity: []int{0},
Type: v1.ResourceMemory,
Size: 0,
},
{
NUMAAffinity: []int{0},
Type: hugepages1Gi,
Size: 0,
},
},
"restartableInitContainer2": {
{
NUMAAffinity: []int{0},
Type: v1.ResourceMemory,
Size: 2 * gb,
},
{
NUMAAffinity: []int{0},
Type: hugepages1Gi,
Size: 2 * gb,
},
},
"initContainer3": {
{
NUMAAffinity: []int{0},
Type: v1.ResourceMemory,
Size: 0,
},
{
NUMAAffinity: []int{0},
Type: hugepages1Gi,
Size: 0,
},
},
"restartableInitContainer4": {
{
NUMAAffinity: []int{0},
Type: v1.ResourceMemory,
Size: 4 * gb,
},
{
NUMAAffinity: []int{0},
Type: hugepages1Gi,
Size: 4 * gb,
},
},
"container1": {
{
NUMAAffinity: []int{0},
Type: v1.ResourceMemory,
Size: 1 * gb,
},
{
NUMAAffinity: []int{0},
Type: hugepages1Gi,
Size: 1 * gb,
},
},
},
},
machineState: state.NUMANodeMap{
0: &state.NUMANodeState{
MemoryMap: map[v1.ResourceName]*state.MemoryTable{
v1.ResourceMemory: {
Allocatable: 7 * gb,
Free: 7 * gb,
Reserved: 0,
SystemReserved: 512 * mb,
TotalMemSize: 7680 * mb,
},
hugepages1Gi: {
Allocatable: 7 * gb,
Free: 7 * gb,
Reserved: 0,
SystemReserved: 0,
TotalMemSize: 7 * gb,
},
},
Cells: []int{0},
},
},
expectedMachineState: state.NUMANodeMap{
0: &state.NUMANodeState{
MemoryMap: map[v1.ResourceName]*state.MemoryTable{
v1.ResourceMemory: {
Allocatable: 7 * gb,
Free: 0,
Reserved: 7 * gb,
SystemReserved: 512 * mb,
TotalMemSize: 7680 * mb,
},
hugepages1Gi: {
Allocatable: 7 * gb,
Free: 0,
Reserved: 7 * gb,
SystemReserved: 0,
TotalMemSize: 7 * gb,
},
},
Cells: []int{0},
NumberOfAssignments: 10,
},
},
systemReserved: systemReservedMemory{
0: map[v1.ResourceName]uint64{
v1.ResourceMemory: 512 * mb,
},
},
pod: getPodWithInitContainers(
"pod1",
[]v1.Container{
{
Name: "container1",
Resources: *requirementsGuaranteed,
},
},
[]v1.Container{
{
Name: "initContainer1",
Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1000Mi"),
v1.ResourceMemory: resource.MustParse("1Gi"),
hugepages1Gi: resource.MustParse("1Gi"),
},
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1000Mi"),
v1.ResourceMemory: resource.MustParse("1Gi"),
hugepages1Gi: resource.MustParse("1Gi"),
},
},
},
{
Name: "restartableInitContainer2",
Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1000Mi"),
v1.ResourceMemory: resource.MustParse("2Gi"),
hugepages1Gi: resource.MustParse("2Gi"),
},
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1000Mi"),
v1.ResourceMemory: resource.MustParse("2Gi"),
hugepages1Gi: resource.MustParse("2Gi"),
},
},
RestartPolicy: &containerRestartPolicyAlways,
},
{
Name: "initContainer3",
Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1000Mi"),
v1.ResourceMemory: resource.MustParse("3Gi"),
hugepages1Gi: resource.MustParse("3Gi"),
},
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1000Mi"),
v1.ResourceMemory: resource.MustParse("3Gi"),
hugepages1Gi: resource.MustParse("3Gi"),
},
},
},
{
Name: "restartableInitContainer4",
Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1000Mi"),
v1.ResourceMemory: resource.MustParse("4Gi"),
hugepages1Gi: resource.MustParse("4Gi"),
},
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1000Mi"),
v1.ResourceMemory: resource.MustParse("4Gi"),
hugepages1Gi: resource.MustParse("4Gi"),
},
},
RestartPolicy: &containerRestartPolicyAlways,
},
},
),
topologyHint: &topologymanager.TopologyHint{},
},
}
for _, testCase := range testCases {
t.Run(testCase.description, func(t *testing.T) {
klog.InfoS("TestStaticPolicyAllocateWithRestartableInitContainers", "name", testCase.description)
p, s, err := initTests(t, &testCase, testCase.topologyHint, testCase.initContainersReusableMemory)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
for i := range testCase.pod.Spec.InitContainers {
err = p.Allocate(s, testCase.pod, &testCase.pod.Spec.InitContainers[i])
if !reflect.DeepEqual(err, testCase.expectedError) {
t.Fatalf("The actual error %v is different from the expected one %v", err, testCase.expectedError)
}
}
if err != nil {
return
}
for i := range testCase.pod.Spec.Containers {
err = p.Allocate(s, testCase.pod, &testCase.pod.Spec.Containers[i])
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
}
assignments := s.GetMemoryAssignments()
if !areContainerMemoryAssignmentsEqual(t, assignments, testCase.expectedAssignments) {
t.Fatalf("Actual assignments %v are different from the expected %v", assignments, testCase.expectedAssignments)
}
machineState := s.GetMachineState()
if !areMachineStatesEqual(machineState, testCase.expectedMachineState) {
t.Fatalf("The actual machine state %v is different from the expected %v", machineState, testCase.expectedMachineState)
}
})
}
}
func TestStaticPolicyRemoveContainer(t *testing.T) {
testCases := []testStaticPolicy{
{
@ -3163,3 +3503,276 @@ func TestStaticPolicyGetTopologyHints(t *testing.T) {
})
}
}
func Test_getPodRequestedResources(t *testing.T) {
testCases := []struct {
description string
pod *v1.Pod
expected map[v1.ResourceName]uint64
}{
{
description: "maximum resources of init containers > total resources of containers",
pod: getPodWithInitContainers(
"",
[]v1.Container{
{
Name: "container1",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("1Gi"),
hugepages1Gi: resource.MustParse("1Gi"),
},
},
},
{
Name: "container2",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("2Gi"),
hugepages1Gi: resource.MustParse("2Gi"),
},
},
},
},
[]v1.Container{
{
Name: "initContainer1",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("1Gi"),
hugepages1Gi: resource.MustParse("1Gi"),
},
},
},
{
Name: "initContainer2",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("4Gi"),
hugepages1Gi: resource.MustParse("4Gi"),
},
},
},
},
),
expected: map[v1.ResourceName]uint64{
v1.ResourceMemory: 4 * gb,
hugepages1Gi: 4 * gb,
},
},
{
description: "maximum resources of init containers < total resources of containers",
pod: getPodWithInitContainers(
"",
[]v1.Container{
{
Name: "container1",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("2Gi"),
hugepages1Gi: resource.MustParse("2Gi"),
},
},
},
{
Name: "container2",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("3Gi"),
hugepages1Gi: resource.MustParse("3Gi"),
},
},
},
},
[]v1.Container{
{
Name: "initContainer1",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("1Gi"),
hugepages1Gi: resource.MustParse("1Gi"),
},
},
},
{
Name: "initContainer2",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("3Gi"),
hugepages1Gi: resource.MustParse("3Gi"),
},
},
},
},
),
expected: map[v1.ResourceName]uint64{
v1.ResourceMemory: 5 * gb,
hugepages1Gi: 5 * gb,
},
},
{
description: "calculate different resources independently",
pod: getPodWithInitContainers(
"",
[]v1.Container{
{
Name: "container1",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("2Gi"),
hugepages1Gi: resource.MustParse("1Gi"),
},
},
},
{
Name: "container2",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("3Gi"),
hugepages1Gi: resource.MustParse("2Gi"),
},
},
},
},
[]v1.Container{
{
Name: "initContainer1",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("1Gi"),
hugepages1Gi: resource.MustParse("1Gi"),
},
},
},
{
Name: "initContainer2",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("3Gi"),
hugepages1Gi: resource.MustParse("4Gi"),
},
},
},
},
),
expected: map[v1.ResourceName]uint64{
v1.ResourceMemory: 5 * gb,
hugepages1Gi: 4 * gb,
},
},
{
description: "maximum resources of init containers > total resources of long running containers, including restartable init containers",
pod: getPodWithInitContainers(
"",
[]v1.Container{
{
Name: "container1",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("1Gi"),
hugepages1Gi: resource.MustParse("1Gi"),
},
},
},
{
Name: "container2",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("2Gi"),
hugepages1Gi: resource.MustParse("2Gi"),
},
},
},
},
[]v1.Container{
{
Name: "restartableInit1",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("2Gi"),
hugepages1Gi: resource.MustParse("2Gi"),
},
},
RestartPolicy: &containerRestartPolicyAlways,
},
{
Name: "initContainer2",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("4Gi"),
hugepages1Gi: resource.MustParse("4Gi"),
},
},
},
},
),
expected: map[v1.ResourceName]uint64{
v1.ResourceMemory: 6 * gb,
hugepages1Gi: 6 * gb,
},
},
{
description: "maximum resources of init containers < total resources of long running containers, including restartable init containers",
pod: getPodWithInitContainers(
"",
[]v1.Container{
{
Name: "container1",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("2Gi"),
hugepages1Gi: resource.MustParse("2Gi"),
},
},
},
{
Name: "container2",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("3Gi"),
hugepages1Gi: resource.MustParse("3Gi"),
},
},
},
},
[]v1.Container{
{
Name: "restartableInit1",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("2Gi"),
hugepages1Gi: resource.MustParse("2Gi"),
},
},
RestartPolicy: &containerRestartPolicyAlways,
},
{
Name: "initContainer2",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("4Gi"),
hugepages1Gi: resource.MustParse("4Gi"),
},
},
},
},
),
expected: map[v1.ResourceName]uint64{
v1.ResourceMemory: 7 * gb,
hugepages1Gi: 7 * gb,
},
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
actual, err := getPodRequestedResources(tc.pod)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
if diff := cmp.Diff(actual, tc.expected); diff != "" {
t.Errorf("getPodRequestedResources() mismatch (-want +got):\n%s", diff)
}
})
}
}