Determine resize status from state in handlePodResourcesResize

This commit is contained in:
Tim Allclair
2024-11-04 14:49:33 -08:00
parent a28f14089c
commit 4a4748d23c
3 changed files with 66 additions and 30 deletions

View File

@@ -1828,7 +1828,7 @@ func (kl *Kubelet) SyncPod(ctx context.Context, updateType kubetypes.SyncPodType
// this conveniently retries any Deferred resize requests
// TODO(vinaykul,InPlacePodVerticalScaling): Investigate doing this in HandlePodUpdates + periodic SyncLoop scan
// See: https://github.com/kubernetes/kubernetes/pull/102884#discussion_r663160060
pod, err = kl.handlePodResourcesResize(pod)
pod, err = kl.handlePodResourcesResize(pod, podStatus)
if err != nil {
return false, err
}
@@ -2793,12 +2793,20 @@ func (kl *Kubelet) canResizePod(pod *v1.Pod) (bool, v1.PodResizeStatus) {
return true, v1.PodResizeStatusInProgress
}
func (kl *Kubelet) handlePodResourcesResize(pod *v1.Pod) (*v1.Pod, error) {
// handlePodResourcesResize returns the "allocated pod", which should be used for all resource
// calculations after this function is called. It also updates the cached ResizeStatus according to
// the allocation decision and pod status.
func (kl *Kubelet) handlePodResourcesResize(pod *v1.Pod, podStatus *kubecontainer.PodStatus) (*v1.Pod, error) {
allocatedPod, updated := kl.statusManager.UpdatePodFromAllocation(pod)
if !updated {
// Unless a resize is in-progress, clear the resize status.
resizeStatus, _ := kl.statusManager.GetPodResizeStatus(string(pod.UID))
if resizeStatus != v1.PodResizeStatusInProgress {
resizeInProgress := !allocatedResourcesMatchStatus(allocatedPod, podStatus)
if resizeInProgress {
// If a resize in progress, make sure the cache has the correct state in case the Kubelet restarted.
if err := kl.statusManager.SetPodResizeStatus(pod.UID, v1.PodResizeStatusInProgress); err != nil {
klog.ErrorS(err, "Failed to set resize status to InProgress", "pod", format.Pod(pod))
}
} else {
// (Desired == Allocated == Actual) => clear the resize status.
if err := kl.statusManager.SetPodResizeStatus(pod.UID, ""); err != nil {
klog.ErrorS(err, "Failed to clear resize status", "pod", format.Pod(pod))
}
@@ -2819,10 +2827,8 @@ func (kl *Kubelet) handlePodResourcesResize(pod *v1.Pod) (*v1.Pod, error) {
allocatedPod = pod
}
if resizeStatus != "" {
// Save resize decision to checkpoint
if err := kl.statusManager.SetPodResizeStatus(allocatedPod.UID, resizeStatus); err != nil {
//TODO(vinaykul,InPlacePodVerticalScaling): Can we recover from this in some way? Investigate
klog.ErrorS(err, "SetPodResizeStatus failed", "pod", klog.KObj(allocatedPod))
if err := kl.statusManager.SetPodResizeStatus(pod.UID, resizeStatus); err != nil {
klog.ErrorS(err, "Failed to set resize status", "pod", format.Pod(pod), "resizeStatus", resizeStatus)
}
}
return allocatedPod, nil

View File

@@ -1757,15 +1757,6 @@ func (kl *Kubelet) determinePodResizeStatus(allocatedPod *v1.Pod, podStatus *kub
}
resizeStatus, _ := kl.statusManager.GetPodResizeStatus(string(allocatedPod.UID))
// If the resize was in-progress and the actual resources match the allocated resources, mark
// the resize as complete by clearing the resize status.
if resizeStatus == v1.PodResizeStatusInProgress &&
allocatedResourcesMatchStatus(allocatedPod, podStatus) {
if err := kl.statusManager.SetPodResizeStatus(allocatedPod.UID, ""); err != nil {
klog.ErrorS(err, "SetPodResizeStatus failed", "pod", format.Pod(allocatedPod))
}
return ""
}
return resizeStatus
}

View File

@@ -2665,11 +2665,12 @@ func TestHandlePodResourcesResize(t *testing.T) {
defer kubelet.podManager.RemovePod(testPod1)
tests := []struct {
name string
pod *v1.Pod
newRequests v1.ResourceList
expectedAllocations v1.ResourceList
expectedResize v1.PodResizeStatus
name string
pod *v1.Pod
newRequests v1.ResourceList
newRequestsAllocated bool // Whether the new requests have already been allocated (but not actuated)
expectedAllocations v1.ResourceList
expectedResize v1.PodResizeStatus
}{
{
name: "Request CPU and memory decrease - expect InProgress",
@@ -2720,25 +2721,63 @@ func TestHandlePodResourcesResize(t *testing.T) {
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
expectedResize: v1.PodResizeStatusInfeasible,
},
{
name: "CPU increase in progress - expect InProgress",
pod: testPod2,
newRequests: v1.ResourceList{v1.ResourceCPU: cpu1500m, v1.ResourceMemory: mem1000M},
newRequestsAllocated: true,
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu1500m, v1.ResourceMemory: mem1000M},
expectedResize: v1.PodResizeStatusInProgress,
},
{
name: "No resize",
pod: testPod2,
newRequests: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
expectedResize: "",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
kubelet.statusManager = status.NewFakeManager()
require.NoError(t, kubelet.statusManager.SetPodAllocation(tt.pod))
pod := tt.pod.DeepCopy()
pod.Spec.Containers[0].Resources.Requests = tt.newRequests
updatedPod, err := kubelet.handlePodResourcesResize(pod)
newPod := tt.pod.DeepCopy()
newPod.Spec.Containers[0].Resources.Requests = tt.newRequests
if !tt.newRequestsAllocated {
require.NoError(t, kubelet.statusManager.SetPodAllocation(tt.pod))
} else {
require.NoError(t, kubelet.statusManager.SetPodAllocation(newPod))
}
podStatus := &kubecontainer.PodStatus{
ID: tt.pod.UID,
Name: tt.pod.Name,
Namespace: tt.pod.Namespace,
ContainerStatuses: make([]*kubecontainer.Status, len(tt.pod.Spec.Containers)),
}
for i, c := range tt.pod.Spec.Containers {
podStatus.ContainerStatuses[i] = &kubecontainer.Status{
Name: c.Name,
State: kubecontainer.ContainerStateRunning,
Resources: &kubecontainer.ContainerResources{
CPURequest: c.Resources.Requests.Cpu(),
CPULimit: c.Resources.Limits.Cpu(),
MemoryLimit: c.Resources.Limits.Memory(),
},
}
}
updatedPod, err := kubelet.handlePodResourcesResize(newPod, podStatus)
require.NoError(t, err)
assert.Equal(t, tt.expectedAllocations, updatedPod.Spec.Containers[0].Resources.Requests, "updated pod spec resources")
alloc, found := kubelet.statusManager.GetContainerResourceAllocation(string(pod.UID), pod.Spec.Containers[0].Name)
alloc, found := kubelet.statusManager.GetContainerResourceAllocation(string(newPod.UID), newPod.Spec.Containers[0].Name)
require.True(t, found, "container allocation")
assert.Equal(t, tt.expectedAllocations, alloc.Requests, "stored container allocation")
resizeStatus, found := kubelet.statusManager.GetPodResizeStatus(string(pod.UID))
require.True(t, found, "pod resize status")
resizeStatus, _ := kubelet.statusManager.GetPodResizeStatus(string(newPod.UID))
assert.Equal(t, tt.expectedResize, resizeStatus)
})
}