Merge pull request #128680 from tallclair/min-cpu

[FG:InPlacePodVerticalScaling] Handle edge cases around CPU MinShares
This commit is contained in:
Kubernetes Prow Robot 2024-11-08 05:24:51 +00:00 committed by GitHub
commit 0fff5bbe7d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 268 additions and 28 deletions

View File

@ -2912,6 +2912,16 @@ func (kl *Kubelet) handlePodResourcesResize(pod *v1.Pod, podStatus *kubecontaine
}
}
allocatedPod = pod
// Special case when the updated allocation matches the actual resources. This can occur
// when reverting a resize that hasn't been actuated, or when making an equivalent change
// (such as CPU requests below MinShares). This is an optimization to clear the resize
// status immediately, rather than waiting for the next SyncPod iteration.
if allocatedResourcesMatchStatus(allocatedPod, podStatus) {
// In this case, consider the resize complete.
kl.statusManager.SetPodResizeStatus(pod.UID, "")
return allocatedPod, nil
}
}
if resizeStatus != "" {
kl.statusManager.SetPodResizeStatus(pod.UID, resizeStatus)

View File

@ -1791,7 +1791,9 @@ func allocatedResourcesMatchStatus(allocatedPod *v1.Pod, podStatus *kubecontaine
// Only compare resizeable resources, and only compare resources that are explicitly configured.
if hasCPUReq {
if !cpuReq.Equal(*cs.Resources.CPURequest) {
// If both allocated & status CPU requests are at or below MinShares then they are considered equal.
if !cpuReq.Equal(*cs.Resources.CPURequest) &&
(cpuReq.MilliValue() > cm.MinShares || cs.Resources.CPURequest.MilliValue() > cm.MinShares) {
return false
}
}
@ -2150,7 +2152,12 @@ func (kl *Kubelet) convertToAPIContainerStatuses(pod *v1.Pod, podStatus *kubecon
}
if resources.Requests != nil {
if cStatus.Resources != nil && cStatus.Resources.CPURequest != nil {
resources.Requests[v1.ResourceCPU] = cStatus.Resources.CPURequest.DeepCopy()
// If both the allocated & actual resources are at or below MinShares, preserve the
// allocated value in the API to avoid confusion and simplify comparisons.
if cStatus.Resources.CPURequest.MilliValue() > cm.MinShares ||
resources.Requests.Cpu().MilliValue() > cm.MinShares {
resources.Requests[v1.ResourceCPU] = cStatus.Resources.CPURequest.DeepCopy()
}
} else {
preserveOldResourcesValue(v1.ResourceCPU, oldStatus.Resources.Requests, resources.Requests)
}

View File

@ -4723,7 +4723,8 @@ func TestConvertToAPIContainerStatusesForResources(t *testing.T) {
for tdesc, tc := range map[string]struct {
State kubecontainer.State // Defaults to Running
Resources v1.ResourceRequirements
AllocatedResources *v1.ResourceRequirements // Defaults to Resources
AllocatedResources *v1.ResourceRequirements // Defaults to Resources
ActualResources *kubecontainer.ContainerResources // Defaults to Resources equivalent
OldStatus v1.ContainerStatus
Expected v1.ContainerStatus
}{
@ -4765,6 +4766,70 @@ func TestConvertToAPIContainerStatusesForResources(t *testing.T) {
Resources: &v1.ResourceRequirements{Limits: CPU1AndMem1G, Requests: CPU1AndMem1G},
},
},
"BurstableQoSPod without CPU": {
Resources: v1.ResourceRequirements{Requests: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("100M"),
}},
ActualResources: &kubecontainer.ContainerResources{
CPURequest: resource.NewMilliQuantity(2, resource.DecimalSI),
},
OldStatus: v1.ContainerStatus{
Name: testContainerName,
Image: "img",
ImageID: "img1234",
State: v1.ContainerState{Running: &v1.ContainerStateRunning{}},
Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("100M"),
}},
},
Expected: v1.ContainerStatus{
Name: testContainerName,
ContainerID: testContainerID.String(),
Image: "img",
ImageID: "img1234",
State: v1.ContainerState{Running: &v1.ContainerStateRunning{StartedAt: metav1.NewTime(nowTime)}},
AllocatedResources: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("100M"),
},
Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("100M"),
}},
},
},
"BurstableQoSPod with below min CPU": {
Resources: v1.ResourceRequirements{Requests: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("100M"),
v1.ResourceCPU: resource.MustParse("1m"),
}},
ActualResources: &kubecontainer.ContainerResources{
CPURequest: resource.NewMilliQuantity(2, resource.DecimalSI),
},
OldStatus: v1.ContainerStatus{
Name: testContainerName,
Image: "img",
ImageID: "img1234",
State: v1.ContainerState{Running: &v1.ContainerStateRunning{}},
Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("100M"),
v1.ResourceCPU: resource.MustParse("1m"),
}},
},
Expected: v1.ContainerStatus{
Name: testContainerName,
ContainerID: testContainerID.String(),
Image: "img",
ImageID: "img1234",
State: v1.ContainerState{Running: &v1.ContainerStateRunning{StartedAt: metav1.NewTime(nowTime)}},
AllocatedResources: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("100M"),
v1.ResourceCPU: resource.MustParse("1m"),
},
Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("100M"),
v1.ResourceCPU: resource.MustParse("1m"),
}},
},
},
"GuaranteedQoSPod with CPU and memory CRI status, with ephemeral storage": {
Resources: v1.ResourceRequirements{Limits: CPU1AndMem1GAndStorage2G, Requests: CPU1AndMem1GAndStorage2G},
OldStatus: v1.ContainerStatus{
@ -5003,10 +5068,13 @@ func TestConvertToAPIContainerStatusesForResources(t *testing.T) {
tPod.Spec.Containers[0].Resources = tc.Resources
}
kubelet.statusManager.SetPodAllocation(tPod)
resources := &kubecontainer.ContainerResources{
MemoryLimit: tc.Resources.Limits.Memory(),
CPULimit: tc.Resources.Limits.Cpu(),
CPURequest: tc.Resources.Requests.Cpu(),
resources := tc.ActualResources
if resources == nil {
resources = &kubecontainer.ContainerResources{
MemoryLimit: tc.Resources.Limits.Memory(),
CPULimit: tc.Resources.Limits.Cpu(),
CPURequest: tc.Resources.Requests.Cpu(),
}
}
state := kubecontainer.ContainerStateRunning
if tc.State != "" {
@ -6698,6 +6766,30 @@ func TestAllocatedResourcesMatchStatus(t *testing.T) {
CPURequest: resource.NewMilliQuantity(2, resource.DecimalSI),
},
expectMatch: true,
}, {
name: "burstable: min cpu request",
allocatedResources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("100M"),
v1.ResourceCPU: resource.MustParse("2m"),
},
},
statusResources: &kubecontainer.ContainerResources{
CPURequest: resource.NewMilliQuantity(2, resource.DecimalSI),
},
expectMatch: true,
}, {
name: "burstable: below min cpu request",
allocatedResources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("100M"),
v1.ResourceCPU: resource.MustParse("1m"),
},
},
statusResources: &kubecontainer.ContainerResources{
CPURequest: resource.NewMilliQuantity(2, resource.DecimalSI),
},
expectMatch: true,
}, {
name: "best effort",
allocatedResources: v1.ResourceRequirements{},

View File

@ -2588,6 +2588,8 @@ func TestHandlePodResourcesResize(t *testing.T) {
defer testKubelet.Cleanup()
kubelet := testKubelet.kubelet
cpu1m := resource.MustParse("1m")
cpu2m := resource.MustParse("2m")
cpu500m := resource.MustParse("500m")
cpu1000m := resource.MustParse("1")
cpu1500m := resource.MustParse("1500m")
@ -2671,7 +2673,7 @@ func TestHandlePodResourcesResize(t *testing.T) {
tests := []struct {
name string
pod *v1.Pod
originalRequests v1.ResourceList
newRequests v1.ResourceList
newRequestsAllocated bool // Whether the new requests have already been allocated (but not actuated)
expectedAllocations v1.ResourceList
@ -2681,7 +2683,7 @@ func TestHandlePodResourcesResize(t *testing.T) {
}{
{
name: "Request CPU and memory decrease - expect InProgress",
pod: testPod2,
originalRequests: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
newRequests: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M},
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M},
expectedResize: v1.PodResizeStatusInProgress,
@ -2689,7 +2691,7 @@ func TestHandlePodResourcesResize(t *testing.T) {
},
{
name: "Request CPU increase, memory decrease - expect InProgress",
pod: testPod2,
originalRequests: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
newRequests: v1.ResourceList{v1.ResourceCPU: cpu1500m, v1.ResourceMemory: mem500M},
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu1500m, v1.ResourceMemory: mem500M},
expectedResize: v1.PodResizeStatusInProgress,
@ -2697,7 +2699,7 @@ func TestHandlePodResourcesResize(t *testing.T) {
},
{
name: "Request CPU decrease, memory increase - expect InProgress",
pod: testPod2,
originalRequests: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
newRequests: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem1500M},
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem1500M},
expectedResize: v1.PodResizeStatusInProgress,
@ -2705,35 +2707,35 @@ func TestHandlePodResourcesResize(t *testing.T) {
},
{
name: "Request CPU and memory increase beyond current capacity - expect Deferred",
pod: testPod2,
originalRequests: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
newRequests: v1.ResourceList{v1.ResourceCPU: cpu2500m, v1.ResourceMemory: mem2500M},
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
expectedResize: v1.PodResizeStatusDeferred,
},
{
name: "Request CPU decrease and memory increase beyond current capacity - expect Deferred",
pod: testPod2,
originalRequests: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
newRequests: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem2500M},
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
expectedResize: v1.PodResizeStatusDeferred,
},
{
name: "Request memory increase beyond node capacity - expect Infeasible",
pod: testPod2,
originalRequests: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
newRequests: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem4500M},
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
expectedResize: v1.PodResizeStatusInfeasible,
},
{
name: "Request CPU increase beyond node capacity - expect Infeasible",
pod: testPod2,
originalRequests: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
newRequests: v1.ResourceList{v1.ResourceCPU: cpu5000m, v1.ResourceMemory: mem1000M},
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
expectedResize: v1.PodResizeStatusInfeasible,
},
{
name: "CPU increase in progress - expect InProgress",
pod: testPod2,
originalRequests: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
newRequests: v1.ResourceList{v1.ResourceCPU: cpu1500m, v1.ResourceMemory: mem1000M},
newRequestsAllocated: true,
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu1500m, v1.ResourceMemory: mem1000M},
@ -2741,19 +2743,53 @@ func TestHandlePodResourcesResize(t *testing.T) {
},
{
name: "No resize",
pod: testPod2,
originalRequests: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
newRequests: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
expectedResize: "",
},
{
name: "windows node, expect Infeasible",
pod: testPod2,
originalRequests: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
newRequests: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M},
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
expectedResize: v1.PodResizeStatusInfeasible,
goos: "windows",
},
{
name: "Increase CPU from min shares",
originalRequests: v1.ResourceList{v1.ResourceCPU: cpu2m},
newRequests: v1.ResourceList{v1.ResourceCPU: cpu1000m},
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu1000m},
expectedResize: v1.PodResizeStatusInProgress,
expectBackoffReset: true,
},
{
name: "Decrease CPU to min shares",
originalRequests: v1.ResourceList{v1.ResourceCPU: cpu1000m},
newRequests: v1.ResourceList{v1.ResourceCPU: cpu2m},
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu2m},
expectedResize: v1.PodResizeStatusInProgress,
expectBackoffReset: true,
},
{
name: "Equivalent min CPU shares",
originalRequests: v1.ResourceList{v1.ResourceCPU: cpu1m},
newRequests: v1.ResourceList{v1.ResourceCPU: cpu2m},
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu2m},
expectedResize: "",
// Even though the resize isn't being actuated, we still clear the container backoff
// since the allocation is changing.
expectBackoffReset: true,
},
{
name: "Equivalent min CPU shares - already allocated",
originalRequests: v1.ResourceList{v1.ResourceCPU: cpu2m},
newRequests: v1.ResourceList{v1.ResourceCPU: cpu1m},
newRequestsAllocated: true,
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu1m},
expectedResize: "",
},
}
for _, tt := range tests {
@ -2765,22 +2801,26 @@ func TestHandlePodResourcesResize(t *testing.T) {
}
kubelet.statusManager = status.NewFakeManager()
newPod := tt.pod.DeepCopy()
originalPod := testPod1.DeepCopy()
originalPod.Spec.Containers[0].Resources.Requests = tt.originalRequests
kubelet.podManager.UpdatePod(originalPod)
newPod := originalPod.DeepCopy()
newPod.Spec.Containers[0].Resources.Requests = tt.newRequests
if !tt.newRequestsAllocated {
require.NoError(t, kubelet.statusManager.SetPodAllocation(tt.pod))
require.NoError(t, kubelet.statusManager.SetPodAllocation(originalPod))
} else {
require.NoError(t, kubelet.statusManager.SetPodAllocation(newPod))
}
podStatus := &kubecontainer.PodStatus{
ID: tt.pod.UID,
Name: tt.pod.Name,
Namespace: tt.pod.Namespace,
ContainerStatuses: make([]*kubecontainer.Status, len(tt.pod.Spec.Containers)),
ID: originalPod.UID,
Name: originalPod.Name,
Namespace: originalPod.Namespace,
ContainerStatuses: make([]*kubecontainer.Status, len(originalPod.Spec.Containers)),
}
for i, c := range tt.pod.Spec.Containers {
for i, c := range originalPod.Spec.Containers {
podStatus.ContainerStatuses[i] = &kubecontainer.Status{
Name: c.Name,
State: kubecontainer.ContainerStateRunning,
@ -2794,7 +2834,7 @@ func TestHandlePodResourcesResize(t *testing.T) {
now := kubelet.clock.Now()
// Put the container in backoff so we can confirm backoff is reset.
backoffKey := kuberuntime.GetStableKey(tt.pod, &tt.pod.Spec.Containers[0])
backoffKey := kuberuntime.GetStableKey(originalPod, &originalPod.Spec.Containers[0])
kubelet.backOff.Next(backoffKey, now)
updatedPod, err := kubelet.handlePodResourcesResize(newPod, podStatus)

View File

@ -598,6 +598,13 @@ func (m *kubeGenericRuntimeManager) computePodResizeAction(pod *v1.Pod, containe
// Note: cgroup doesn't support memory request today, so we don't compare that. If canAdmitPod called during
// handlePodResourcesResize finds 'fit', then desiredMemoryRequest == currentMemoryRequest.
// Special case for minimum CPU request
if desiredResources.cpuRequest <= cm.MinShares && currentResources.cpuRequest <= cm.MinShares {
// If both desired & current CPU requests are at or below MinShares,
// then consider these equal.
desiredResources.cpuRequest = currentResources.cpuRequest
}
if currentResources == desiredResources {
// No resize required.
return true
@ -824,8 +831,14 @@ func (m *kubeGenericRuntimeManager) updatePodContainerResources(ctx context.Cont
case v1.ResourceMemory:
return status.Resources.MemoryLimit.Equal(*container.Resources.Limits.Memory())
case v1.ResourceCPU:
return status.Resources.CPURequest.Equal(*container.Resources.Requests.Cpu()) &&
status.Resources.CPULimit.Equal(*container.Resources.Limits.Cpu())
if !status.Resources.CPULimit.Equal(*container.Resources.Limits.Cpu()) {
return false // limits don't match
} else if status.Resources.CPURequest.Equal(*container.Resources.Requests.Cpu()) {
return true // requests & limits both match
}
// Consider requests equal if both are at or below MinShares.
return status.Resources.CPURequest.MilliValue() <= cm.MinShares &&
container.Resources.Requests.Cpu().MilliValue() <= cm.MinShares
default:
return true // Shouldn't happen.
}

View File

@ -2206,6 +2206,8 @@ func TestComputePodActionsForPodResize(t *testing.T) {
m.machineInfo.MemoryCapacity = 17179860387 // 16GB
assert.NoError(t, err)
cpu1m := resource.MustParse("1m")
cpu2m := resource.MustParse("2m")
cpu100m := resource.MustParse("100m")
cpu200m := resource.MustParse("200m")
mem100M := resource.MustParse("100Mi")
@ -2372,6 +2374,48 @@ func TestComputePodActionsForPodResize(t *testing.T) {
return &pa
},
},
"Nothing when spec.Resources and status.Resources are equivalent": {
setupFn: func(pod *v1.Pod, status *kubecontainer.PodStatus) {
c := &pod.Spec.Containers[1]
c.Resources = v1.ResourceRequirements{} // best effort pod
if cStatus := status.FindContainerStatusByName(c.Name); cStatus != nil {
cStatus.Resources = &kubecontainer.ContainerResources{
CPURequest: ptr.To(cpu2m.DeepCopy()),
}
}
},
getExpectedPodActionsFn: func(pod *v1.Pod, podStatus *kubecontainer.PodStatus) *podActions {
pa := podActions{
SandboxID: podStatus.SandboxStatuses[0].Id,
ContainersToKill: getKillMap(pod, podStatus, []int{}),
ContainersToStart: []int{},
ContainersToUpdate: map[v1.ResourceName][]containerToUpdateInfo{},
}
return &pa
},
},
"Update container CPU resources to equivalent value": {
setupFn: func(pod *v1.Pod, status *kubecontainer.PodStatus) {
c := &pod.Spec.Containers[1]
c.Resources = v1.ResourceRequirements{
Requests: v1.ResourceList{v1.ResourceCPU: cpu1m},
}
if cStatus := status.FindContainerStatusByName(c.Name); cStatus != nil {
cStatus.Resources = &kubecontainer.ContainerResources{
CPURequest: ptr.To(cpu2m.DeepCopy()),
}
}
},
getExpectedPodActionsFn: func(pod *v1.Pod, podStatus *kubecontainer.PodStatus) *podActions {
pa := podActions{
SandboxID: podStatus.SandboxStatuses[0].Id,
ContainersToKill: getKillMap(pod, podStatus, []int{}),
ContainersToStart: []int{},
ContainersToUpdate: map[v1.ResourceName][]containerToUpdateInfo{},
}
return &pa
},
},
"Update container CPU and memory resources with Restart policy for CPU": {
setupFn: func(pod *v1.Pod, status *kubecontainer.PodStatus) {
c := &pod.Spec.Containers[0]

View File

@ -566,6 +566,24 @@ func doPodResizeTests(f *framework.Framework) {
},
},
},
{
name: "Burstable QoS pod, one container with cpu requests - resize with equivalent request",
containers: []e2epod.ResizableContainerInfo{
{
Name: "c1",
Resources: &e2epod.ContainerResources{CPUReq: "2m"},
},
},
patchString: `{"spec":{"containers":[
{"name":"c1", "resources":{"requests":{"cpu":"1m"}}}
]}}`,
expected: []e2epod.ResizableContainerInfo{
{
Name: "c1",
Resources: &e2epod.ContainerResources{CPUReq: "1m"},
},
},
},
{
name: "Guaranteed QoS pod, one container - increase CPU (NotRequired) & memory (RestartContainer)",
testRollback: true,
@ -783,6 +801,22 @@ func doPodResizeTests(f *framework.Framework) {
},
addExtendedResource: true,
},
{
name: "BestEffort QoS pod - empty resize",
containers: []e2epod.ResizableContainerInfo{
{
Name: "c1",
Resources: &e2epod.ContainerResources{},
},
},
patchString: `{}`,
expected: []e2epod.ResizableContainerInfo{
{
Name: "c1",
Resources: &e2epod.ContainerResources{},
},
},
},
}
for idx := range tests {