kubelet: Propagate error in doPodResizeAction() to the caller

This fix makes doPodResizeAction() return the result instead of setting
an error to the `result` argument, which should have been passed as a
pointer, so that the error is propagated to the caller. This fix also
makes the usage of PodSyncResult more consistent with other operations
like starting and killing a container.
This commit is contained in:
Hironori Shiina 2024-11-05 15:06:34 +01:00
parent b4d91d1b8a
commit 5562cb165b
5 changed files with 254 additions and 32 deletions

View File

@ -45,6 +45,12 @@ var (
ErrConfigPodSandbox = errors.New("ConfigPodSandboxError") ErrConfigPodSandbox = errors.New("ConfigPodSandboxError")
// ErrKillPodSandbox returned when runtime failed to stop pod's sandbox. // ErrKillPodSandbox returned when runtime failed to stop pod's sandbox.
ErrKillPodSandbox = errors.New("KillPodSandboxError") ErrKillPodSandbox = errors.New("KillPodSandboxError")
// ErrUpdatePodSandbox returned when runtime failed to update the pod's sandbox config.
ErrUpdatePodSandbox = errors.New("UpdatePodSandboxError")
// ErrUpdateContainerMemory returned when runtime failed to update the pod's container config.
ErrUpdateContainerMemory = errors.New("UpdateContainerMemoryError")
// ErrUpdateContainerCPU returned when runtime failed to update the pod's container config.
ErrUpdateContainerCPU = errors.New("UpdateContainerCPUError")
) )
// SyncAction indicates different kind of actions in SyncPod() and KillPod(). Now there are only actions // SyncAction indicates different kind of actions in SyncPod() and KillPod(). Now there are only actions
@ -68,6 +74,12 @@ const (
ConfigPodSandbox SyncAction = "ConfigPodSandbox" ConfigPodSandbox SyncAction = "ConfigPodSandbox"
// KillPodSandbox action // KillPodSandbox action
KillPodSandbox SyncAction = "KillPodSandbox" KillPodSandbox SyncAction = "KillPodSandbox"
// UpdatePodSandbox action
UpdatePodSandbox SyncAction = "UpdatePodSandbox"
// UpdateContainerMemory action
UpdateContainerMemory SyncAction = "UpdateContainerMemory"
// UpdateContainerCPU action
UpdateContainerCPU SyncAction = "UpdateContainerCPU"
) )
// SyncResult is the result of sync action. // SyncResult is the result of sync action.

View File

@ -391,12 +391,11 @@ func (m *kubeGenericRuntimeManager) generateContainerConfig(ctx context.Context,
return config, cleanupAction, nil return config, cleanupAction, nil
} }
func (m *kubeGenericRuntimeManager) updateContainerResources(pod *v1.Pod, container *v1.Container, containerID kubecontainer.ContainerID) error { func (m *kubeGenericRuntimeManager) updateContainerResources(ctx context.Context, pod *v1.Pod, container *v1.Container, containerID kubecontainer.ContainerID) error {
containerResources := m.generateContainerResources(pod, container) containerResources := m.generateContainerResources(pod, container)
if containerResources == nil { if containerResources == nil {
return fmt.Errorf("container %q updateContainerResources failed: cannot generate resources config", containerID.String()) return fmt.Errorf("container %q updateContainerResources failed: cannot generate resources config", containerID.String())
} }
ctx := context.Background()
err := m.runtimeService.UpdateContainerResources(ctx, containerID.ID, containerResources) err := m.runtimeService.UpdateContainerResources(ctx, containerID.ID, containerResources)
if err != nil { if err != nil {
klog.ErrorS(err, "UpdateContainerResources failed", "container", containerID.String()) klog.ErrorS(err, "UpdateContainerResources failed", "container", containerID.String())

View File

@ -963,7 +963,7 @@ func TestUpdateContainerResources(t *testing.T) {
assert.NoError(t, err) assert.NoError(t, err)
containerID := cStatus[0].ID containerID := cStatus[0].ID
err = m.updateContainerResources(pod, &pod.Spec.Containers[0], containerID) err = m.updateContainerResources(ctx, pod, &pod.Spec.Containers[0], containerID)
assert.NoError(t, err) assert.NoError(t, err)
// Verify container is updated // Verify container is updated

View File

@ -650,13 +650,14 @@ func (m *kubeGenericRuntimeManager) computePodResizeAction(pod *v1.Pod, containe
return true return true
} }
func (m *kubeGenericRuntimeManager) doPodResizeAction(pod *v1.Pod, podStatus *kubecontainer.PodStatus, podContainerChanges podActions, result kubecontainer.PodSyncResult) { func (m *kubeGenericRuntimeManager) doPodResizeAction(ctx context.Context, pod *v1.Pod, podStatus *kubecontainer.PodStatus, podContainerChanges podActions) (result kubecontainer.PodSyncResult) {
updatePodResult := kubecontainer.NewSyncResult(kubecontainer.UpdatePodSandbox, pod.UID)
result.AddSyncResult(updatePodResult)
pcm := m.containerManager.NewPodContainerManager() pcm := m.containerManager.NewPodContainerManager()
//TODO(vinaykul,InPlacePodVerticalScaling): Figure out best way to get enforceMemoryQoS value (parameter #4 below) in platform-agnostic way //TODO(vinaykul,InPlacePodVerticalScaling): Figure out best way to get enforceMemoryQoS value (parameter #4 below) in platform-agnostic way
podResources := cm.ResourceConfigForPod(pod, m.cpuCFSQuota, uint64((m.cpuCFSQuotaPeriod.Duration)/time.Microsecond), false) podResources := cm.ResourceConfigForPod(pod, m.cpuCFSQuota, uint64((m.cpuCFSQuotaPeriod.Duration)/time.Microsecond), false)
if podResources == nil { if podResources == nil {
klog.ErrorS(nil, "Unable to get resource configuration", "pod", pod.Name) result.Fail(fmt.Errorf("unable to get resource configuration processing resize for pod %s", format.Pod(pod)))
result.Fail(fmt.Errorf("Unable to get resource configuration processing resize for pod %s", pod.Name))
return return
} }
setPodCgroupConfig := func(rName v1.ResourceName, setLimitValue bool) error { setPodCgroupConfig := func(rName v1.ResourceName, setLimitValue bool) error {
@ -673,10 +674,7 @@ func (m *kubeGenericRuntimeManager) doPodResizeAction(pod *v1.Pod, podStatus *ku
case v1.ResourceMemory: case v1.ResourceMemory:
err = pcm.SetPodCgroupConfig(pod, rName, podResources) err = pcm.SetPodCgroupConfig(pod, rName, podResources)
} }
if err != nil { return fmt.Errorf("failed to set cgroup config for %s of pod %s: %w", rName, format.Pod(pod), err)
klog.ErrorS(err, "Failed to set cgroup config", "resource", rName, "pod", pod.Name)
}
return err
} }
// Memory and CPU are updated separately because memory resizes may be ordered differently than CPU resizes. // Memory and CPU are updated separately because memory resizes may be ordered differently than CPU resizes.
// If resize results in net pod resource increase, set pod cgroup config before resizing containers. // If resize results in net pod resource increase, set pod cgroup config before resizing containers.
@ -696,9 +694,12 @@ func (m *kubeGenericRuntimeManager) doPodResizeAction(pod *v1.Pod, podStatus *ku
} }
} }
if len(podContainerChanges.ContainersToUpdate[rName]) > 0 { if len(podContainerChanges.ContainersToUpdate[rName]) > 0 {
if err = m.updatePodContainerResources(pod, rName, podContainerChanges.ContainersToUpdate[rName]); err != nil { updateContainerResults, errUpdate := m.updatePodContainerResources(ctx, pod, rName, podContainerChanges.ContainersToUpdate[rName])
klog.ErrorS(err, "updatePodContainerResources failed", "pod", format.Pod(pod), "resource", rName) for _, containerResult := range updateContainerResults {
return err result.AddSyncResult(containerResult)
}
if errUpdate != nil {
return errUpdate
} }
} }
// At downsizing, requests should shrink prior to limits in order to keep "requests <= limits". // At downsizing, requests should shrink prior to limits in order to keep "requests <= limits".
@ -716,25 +717,21 @@ func (m *kubeGenericRuntimeManager) doPodResizeAction(pod *v1.Pod, podStatus *ku
} }
if len(podContainerChanges.ContainersToUpdate[v1.ResourceMemory]) > 0 || podContainerChanges.UpdatePodResources { if len(podContainerChanges.ContainersToUpdate[v1.ResourceMemory]) > 0 || podContainerChanges.UpdatePodResources {
if podResources.Memory == nil { if podResources.Memory == nil {
klog.ErrorS(nil, "podResources.Memory is nil", "pod", pod.Name) result.Fail(fmt.Errorf("podResources.Memory is nil for pod %s", format.Pod(pod)))
result.Fail(fmt.Errorf("podResources.Memory is nil for pod %s", pod.Name))
return return
} }
currentPodMemoryConfig, err := pcm.GetPodCgroupConfig(pod, v1.ResourceMemory) currentPodMemoryConfig, err := pcm.GetPodCgroupConfig(pod, v1.ResourceMemory)
if err != nil { if err != nil {
klog.ErrorS(err, "GetPodCgroupConfig for memory failed", "pod", pod.Name) result.Fail(fmt.Errorf("GetPodCgroupConfig for memory failed for pod %s: %w", format.Pod(pod), err))
result.Fail(err)
return return
} }
currentPodMemoryUsage, err := pcm.GetPodCgroupMemoryUsage(pod) currentPodMemoryUsage, err := pcm.GetPodCgroupMemoryUsage(pod)
if err != nil { if err != nil {
klog.ErrorS(err, "GetPodCgroupMemoryUsage failed", "pod", pod.Name) result.Fail(fmt.Errorf("GetPodCgroupMemoryUsage failed for pod %s", format.Pod(pod)))
result.Fail(err)
return return
} }
if currentPodMemoryUsage >= uint64(*podResources.Memory) { if currentPodMemoryUsage >= uint64(*podResources.Memory) {
klog.ErrorS(nil, "Aborting attempt to set pod memory limit less than current memory usage", "pod", pod.Name) result.Fail(fmt.Errorf("aborting attempt to set pod memory limit less than current memory usage for pod %s", format.Pod(pod)))
result.Fail(fmt.Errorf("Aborting attempt to set pod memory limit less than current memory usage for pod %s", pod.Name))
return return
} }
if errResize := resizeContainers(v1.ResourceMemory, int64(*currentPodMemoryConfig.Memory), *podResources.Memory, 0, 0); errResize != nil { if errResize := resizeContainers(v1.ResourceMemory, int64(*currentPodMemoryConfig.Memory), *podResources.Memory, 0, 0); errResize != nil {
@ -744,14 +741,12 @@ func (m *kubeGenericRuntimeManager) doPodResizeAction(pod *v1.Pod, podStatus *ku
} }
if len(podContainerChanges.ContainersToUpdate[v1.ResourceCPU]) > 0 || podContainerChanges.UpdatePodResources { if len(podContainerChanges.ContainersToUpdate[v1.ResourceCPU]) > 0 || podContainerChanges.UpdatePodResources {
if podResources.CPUQuota == nil || podResources.CPUShares == nil { if podResources.CPUQuota == nil || podResources.CPUShares == nil {
klog.ErrorS(nil, "podResources.CPUQuota or podResources.CPUShares is nil", "pod", pod.Name) result.Fail(fmt.Errorf("podResources.CPUQuota or podResources.CPUShares is nil for pod %s", format.Pod(pod)))
result.Fail(fmt.Errorf("podResources.CPUQuota or podResources.CPUShares is nil for pod %s", pod.Name))
return return
} }
currentPodCpuConfig, err := pcm.GetPodCgroupConfig(pod, v1.ResourceCPU) currentPodCpuConfig, err := pcm.GetPodCgroupConfig(pod, v1.ResourceCPU)
if err != nil { if err != nil {
klog.ErrorS(err, "GetPodCgroupConfig for CPU failed", "pod", pod.Name) result.Fail(fmt.Errorf("GetPodCgroupConfig for CPU failed for pod %s", format.Pod(pod)))
result.Fail(err)
return return
} }
if errResize := resizeContainers(v1.ResourceCPU, *currentPodCpuConfig.CPUQuota, *podResources.CPUQuota, if errResize := resizeContainers(v1.ResourceCPU, *currentPodCpuConfig.CPUQuota, *podResources.CPUQuota,
@ -760,17 +755,20 @@ func (m *kubeGenericRuntimeManager) doPodResizeAction(pod *v1.Pod, podStatus *ku
return return
} }
} }
return
} }
func (m *kubeGenericRuntimeManager) updatePodContainerResources(pod *v1.Pod, resourceName v1.ResourceName, containersToUpdate []containerToUpdateInfo) error { func (m *kubeGenericRuntimeManager) updatePodContainerResources(ctx context.Context, pod *v1.Pod, resourceName v1.ResourceName, containersToUpdate []containerToUpdateInfo) (updateResults []*kubecontainer.SyncResult, err error) {
klog.V(5).InfoS("Updating container resources", "pod", klog.KObj(pod)) klog.V(5).InfoS("Updating container resources", "pod", klog.KObj(pod))
for _, cInfo := range containersToUpdate { for _, cInfo := range containersToUpdate {
var updateContainerResult *kubecontainer.SyncResult
container := pod.Spec.Containers[cInfo.apiContainerIdx].DeepCopy() container := pod.Spec.Containers[cInfo.apiContainerIdx].DeepCopy()
// If updating memory limit, use most recently configured CPU request and limit values. // If updating memory limit, use most recently configured CPU request and limit values.
// If updating CPU request and limit, use most recently configured memory request and limit values. // If updating CPU request and limit, use most recently configured memory request and limit values.
switch resourceName { switch resourceName {
case v1.ResourceMemory: case v1.ResourceMemory:
updateContainerResult = kubecontainer.NewSyncResult(kubecontainer.UpdateContainerMemory, container.Name)
container.Resources.Limits = v1.ResourceList{ container.Resources.Limits = v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(cInfo.currentContainerResources.cpuLimit, resource.DecimalSI), v1.ResourceCPU: *resource.NewMilliQuantity(cInfo.currentContainerResources.cpuLimit, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(cInfo.desiredContainerResources.memoryLimit, resource.BinarySI), v1.ResourceMemory: *resource.NewQuantity(cInfo.desiredContainerResources.memoryLimit, resource.BinarySI),
@ -780,6 +778,7 @@ func (m *kubeGenericRuntimeManager) updatePodContainerResources(pod *v1.Pod, res
v1.ResourceMemory: *resource.NewQuantity(cInfo.desiredContainerResources.memoryRequest, resource.BinarySI), v1.ResourceMemory: *resource.NewQuantity(cInfo.desiredContainerResources.memoryRequest, resource.BinarySI),
} }
case v1.ResourceCPU: case v1.ResourceCPU:
updateContainerResult = kubecontainer.NewSyncResult(kubecontainer.UpdateContainerCPU, container.Name)
container.Resources.Limits = v1.ResourceList{ container.Resources.Limits = v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(cInfo.desiredContainerResources.cpuLimit, resource.DecimalSI), v1.ResourceCPU: *resource.NewMilliQuantity(cInfo.desiredContainerResources.cpuLimit, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(cInfo.currentContainerResources.memoryLimit, resource.BinarySI), v1.ResourceMemory: *resource.NewQuantity(cInfo.currentContainerResources.memoryLimit, resource.BinarySI),
@ -789,12 +788,19 @@ func (m *kubeGenericRuntimeManager) updatePodContainerResources(pod *v1.Pod, res
v1.ResourceMemory: *resource.NewQuantity(cInfo.currentContainerResources.memoryRequest, resource.BinarySI), v1.ResourceMemory: *resource.NewQuantity(cInfo.currentContainerResources.memoryRequest, resource.BinarySI),
} }
} }
if err := m.updateContainerResources(pod, container, cInfo.kubeContainerID); err != nil { updateResults = append(updateResults, updateContainerResult)
if err = m.updateContainerResources(ctx, pod, container, cInfo.kubeContainerID); err != nil {
// Log error and abort as container updates need to succeed in the order determined by computePodResizeAction. // Log error and abort as container updates need to succeed in the order determined by computePodResizeAction.
// The recovery path is for SyncPod to keep retrying at later times until it succeeds. // The recovery path is for SyncPod to keep retrying at later times until it succeeds.
klog.ErrorS(err, "updateContainerResources failed", "container", container.Name, "cID", cInfo.kubeContainerID, klog.ErrorS(err, "updateContainerResources failed", "container", container.Name, "cID", cInfo.kubeContainerID,
"pod", format.Pod(pod), "resourceName", resourceName) "pod", format.Pod(pod), "resourceName", resourceName)
return err switch resourceName {
case v1.ResourceMemory:
updateContainerResult.Fail(kubecontainer.ErrUpdateContainerMemory, err.Error())
case v1.ResourceCPU:
updateContainerResult.Fail(kubecontainer.ErrUpdateContainerCPU, err.Error())
}
return
} }
// If UpdateContainerResources is error-free, it means desired values for 'resourceName' was accepted by runtime. // If UpdateContainerResources is error-free, it means desired values for 'resourceName' was accepted by runtime.
// So we update currentContainerResources for 'resourceName', which is our view of most recently configured resources. // So we update currentContainerResources for 'resourceName', which is our view of most recently configured resources.
@ -808,7 +814,7 @@ func (m *kubeGenericRuntimeManager) updatePodContainerResources(pod *v1.Pod, res
cInfo.currentContainerResources.cpuRequest = cInfo.desiredContainerResources.cpuRequest cInfo.currentContainerResources.cpuRequest = cInfo.desiredContainerResources.cpuRequest
} }
} }
return nil return
} }
// computePodActions checks whether the pod spec has changed and returns the changes if true. // computePodActions checks whether the pod spec has changed and returns the changes if true.
@ -1308,7 +1314,11 @@ func (m *kubeGenericRuntimeManager) SyncPod(ctx context.Context, pod *v1.Pod, po
// Step 7: For containers in podContainerChanges.ContainersToUpdate[CPU,Memory] list, invoke UpdateContainerResources // Step 7: For containers in podContainerChanges.ContainersToUpdate[CPU,Memory] list, invoke UpdateContainerResources
if IsInPlacePodVerticalScalingAllowed(pod) { if IsInPlacePodVerticalScalingAllowed(pod) {
if len(podContainerChanges.ContainersToUpdate) > 0 || podContainerChanges.UpdatePodResources { if len(podContainerChanges.ContainersToUpdate) > 0 || podContainerChanges.UpdatePodResources {
m.doPodResizeAction(pod, podStatus, podContainerChanges, result) resizeResult := m.doPodResizeAction(ctx, pod, podStatus, podContainerChanges)
result.AddPodSyncResult(resizeResult)
if resizeResult.Error() != nil {
klog.ErrorS(resizeResult.Error(), "doPodResizeAction failed")
}
} }
} }

View File

@ -22,6 +22,7 @@ import (
"fmt" "fmt"
"path/filepath" "path/filepath"
"reflect" "reflect"
goruntime "runtime"
"sort" "sort"
"testing" "testing"
"time" "time"
@ -47,6 +48,7 @@ import (
podutil "k8s.io/kubernetes/pkg/api/v1/pod" podutil "k8s.io/kubernetes/pkg/api/v1/pod"
"k8s.io/kubernetes/pkg/credentialprovider" "k8s.io/kubernetes/pkg/credentialprovider"
"k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/kubelet/cm"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
containertest "k8s.io/kubernetes/pkg/kubelet/container/testing" containertest "k8s.io/kubernetes/pkg/kubelet/container/testing"
imagetypes "k8s.io/kubernetes/pkg/kubelet/images" imagetypes "k8s.io/kubernetes/pkg/kubelet/images"
@ -2543,6 +2545,7 @@ func TestUpdatePodContainerResources(t *testing.T) {
res350m300Mi := v1.ResourceList{v1.ResourceCPU: cpu350m, v1.ResourceMemory: mem300M} res350m300Mi := v1.ResourceList{v1.ResourceCPU: cpu350m, v1.ResourceMemory: mem300M}
res300m350Mi := v1.ResourceList{v1.ResourceCPU: cpu300m, v1.ResourceMemory: mem350M} res300m350Mi := v1.ResourceList{v1.ResourceCPU: cpu300m, v1.ResourceMemory: mem350M}
res350m350Mi := v1.ResourceList{v1.ResourceCPU: cpu350m, v1.ResourceMemory: mem350M} res350m350Mi := v1.ResourceList{v1.ResourceCPU: cpu350m, v1.ResourceMemory: mem350M}
fakeError := errors.New("something is wrong")
pod, _ := makeBasePodAndStatus() pod, _ := makeBasePodAndStatus()
makeAndSetFakePod(t, m, fakeRuntime, pod) makeAndSetFakePod(t, m, fakeRuntime, pod)
@ -2552,9 +2555,11 @@ func TestUpdatePodContainerResources(t *testing.T) {
apiSpecResources []v1.ResourceRequirements apiSpecResources []v1.ResourceRequirements
apiStatusResources []v1.ResourceRequirements apiStatusResources []v1.ResourceRequirements
requiresRestart []bool requiresRestart []bool
injectedError error
invokeUpdateResources bool invokeUpdateResources bool
expectedCurrentLimits []v1.ResourceList expectedCurrentLimits []v1.ResourceList
expectedCurrentRequests []v1.ResourceList expectedCurrentRequests []v1.ResourceList
expectedResults []*kubecontainer.SyncResult
}{ }{
"Guaranteed QoS Pod - CPU & memory resize requested, update CPU": { "Guaranteed QoS Pod - CPU & memory resize requested, update CPU": {
resourceName: v1.ResourceCPU, resourceName: v1.ResourceCPU,
@ -2572,6 +2577,20 @@ func TestUpdatePodContainerResources(t *testing.T) {
invokeUpdateResources: true, invokeUpdateResources: true,
expectedCurrentLimits: []v1.ResourceList{res150m100Mi, res250m200Mi, res350m300Mi}, expectedCurrentLimits: []v1.ResourceList{res150m100Mi, res250m200Mi, res350m300Mi},
expectedCurrentRequests: []v1.ResourceList{res150m100Mi, res250m200Mi, res350m300Mi}, expectedCurrentRequests: []v1.ResourceList{res150m100Mi, res250m200Mi, res350m300Mi},
expectedResults: []*kubecontainer.SyncResult{
{
Action: kubecontainer.UpdateContainerCPU,
Target: pod.Spec.Containers[0].Name,
},
{
Action: kubecontainer.UpdateContainerCPU,
Target: pod.Spec.Containers[1].Name,
},
{
Action: kubecontainer.UpdateContainerCPU,
Target: pod.Spec.Containers[2].Name,
},
},
}, },
"Guaranteed QoS Pod - CPU & memory resize requested, update memory": { "Guaranteed QoS Pod - CPU & memory resize requested, update memory": {
resourceName: v1.ResourceMemory, resourceName: v1.ResourceMemory,
@ -2589,6 +2608,72 @@ func TestUpdatePodContainerResources(t *testing.T) {
invokeUpdateResources: true, invokeUpdateResources: true,
expectedCurrentLimits: []v1.ResourceList{res100m150Mi, res200m250Mi, res300m350Mi}, expectedCurrentLimits: []v1.ResourceList{res100m150Mi, res200m250Mi, res300m350Mi},
expectedCurrentRequests: []v1.ResourceList{res100m150Mi, res200m250Mi, res300m350Mi}, expectedCurrentRequests: []v1.ResourceList{res100m150Mi, res200m250Mi, res300m350Mi},
expectedResults: []*kubecontainer.SyncResult{
{
Action: kubecontainer.UpdateContainerMemory,
Target: pod.Spec.Containers[0].Name,
},
{
Action: kubecontainer.UpdateContainerMemory,
Target: pod.Spec.Containers[1].Name,
},
{
Action: kubecontainer.UpdateContainerMemory,
Target: pod.Spec.Containers[2].Name,
},
},
},
"Guaranteed QoS Pod - CPU & memory resize requested, update CPU, error occurs": {
resourceName: v1.ResourceCPU,
apiSpecResources: []v1.ResourceRequirements{
{Limits: res150m150Mi, Requests: res150m150Mi},
{Limits: res250m250Mi, Requests: res250m250Mi},
{Limits: res350m350Mi, Requests: res350m350Mi},
},
apiStatusResources: []v1.ResourceRequirements{
{Limits: res100m100Mi, Requests: res100m100Mi},
{Limits: res200m200Mi, Requests: res200m200Mi},
{Limits: res300m300Mi, Requests: res300m300Mi},
},
requiresRestart: []bool{false, false, false},
invokeUpdateResources: true,
injectedError: fakeError,
expectedCurrentLimits: []v1.ResourceList{res100m100Mi, res200m200Mi, res300m300Mi},
expectedCurrentRequests: []v1.ResourceList{res100m100Mi, res200m200Mi, res300m300Mi},
expectedResults: []*kubecontainer.SyncResult{
{
Action: kubecontainer.UpdateContainerCPU,
Target: pod.Spec.Containers[0].Name,
Error: kubecontainer.ErrUpdateContainerCPU,
Message: fakeError.Error(),
},
},
},
"Guaranteed QoS Pod - CPU & memory resize requested, update memory, error occurs": {
resourceName: v1.ResourceMemory,
apiSpecResources: []v1.ResourceRequirements{
{Limits: res150m150Mi, Requests: res150m150Mi},
{Limits: res250m250Mi, Requests: res250m250Mi},
{Limits: res350m350Mi, Requests: res350m350Mi},
},
apiStatusResources: []v1.ResourceRequirements{
{Limits: res100m100Mi, Requests: res100m100Mi},
{Limits: res200m200Mi, Requests: res200m200Mi},
{Limits: res300m300Mi, Requests: res300m300Mi},
},
requiresRestart: []bool{false, false, false},
invokeUpdateResources: true,
injectedError: fakeError,
expectedCurrentLimits: []v1.ResourceList{res100m100Mi, res200m200Mi, res300m300Mi},
expectedCurrentRequests: []v1.ResourceList{res100m100Mi, res200m200Mi, res300m300Mi},
expectedResults: []*kubecontainer.SyncResult{
{
Action: kubecontainer.UpdateContainerMemory,
Target: pod.Spec.Containers[0].Name,
Error: kubecontainer.ErrUpdateContainerMemory,
Message: fakeError.Error(),
},
},
}, },
} { } {
var containersToUpdate []containerToUpdateInfo var containersToUpdate []containerToUpdateInfo
@ -2615,8 +2700,16 @@ func TestUpdatePodContainerResources(t *testing.T) {
containersToUpdate = append(containersToUpdate, cInfo) containersToUpdate = append(containersToUpdate, cInfo)
} }
fakeRuntime.Called = []string{} fakeRuntime.Called = []string{}
err := m.updatePodContainerResources(pod, tc.resourceName, containersToUpdate) if tc.injectedError != nil {
assert.NoError(t, err, dsc) fakeRuntime.InjectError("UpdateContainerResources", tc.injectedError)
}
updateContainerResults, err := m.updatePodContainerResources(context.TODO(), pod, tc.resourceName, containersToUpdate)
assert.ElementsMatch(t, tc.expectedResults, updateContainerResults)
if tc.injectedError == nil {
require.NoError(t, err, dsc)
} else {
require.EqualError(t, err, tc.injectedError.Error(), dsc)
}
if tc.invokeUpdateResources { if tc.invokeUpdateResources {
assert.Contains(t, fakeRuntime.Called, "UpdateContainerResources", dsc) assert.Contains(t, fakeRuntime.Called, "UpdateContainerResources", dsc)
@ -2757,3 +2850,111 @@ func TestGetImageVolumes(t *testing.T) {
assert.Equal(t, tc.expectedImageVolumePulls, imageVolumePulls) assert.Equal(t, tc.expectedImageVolumePulls, imageVolumePulls)
} }
} }
// This test focuses on verifying `doPodResizeAction()` propagates an error correctly with a few error cases.
// TODO: For increase test coverages, more work like introduing mock framework is necessary in order to emulate errors.
func TestDoPodResizeAction(t *testing.T) {
if goruntime.GOOS != "linux" {
t.Skip("unsupported OS")
}
cpu100m := resource.MustParse("100m")
cpu200m := resource.MustParse("200m")
mem100M := resource.MustParse("100Mi")
mem200M := resource.MustParse("200Mi")
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, true)
_, _, m, err := createTestRuntimeManager()
require.NoError(t, err)
m.cpuCFSQuota = true // Enforce CPU Limits
m.containerManager = cm.NewStubContainerManager()
for _, tc := range []struct {
testName string
currentResources v1.ResourceRequirements
qosClass v1.PodQOSClass
desiredResources v1.ResourceRequirements
expectedError string
}{
{
testName: "Increase cpu and memory",
currentResources: v1.ResourceRequirements{
Requests: v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem100M},
Limits: v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem100M},
},
desiredResources: v1.ResourceRequirements{
Requests: v1.ResourceList{v1.ResourceCPU: cpu200m, v1.ResourceMemory: mem200M},
Limits: v1.ResourceList{v1.ResourceCPU: cpu200m, v1.ResourceMemory: mem200M},
},
qosClass: v1.PodQOSGuaranteed,
expectedError: "not implemented", // containerManagerStub doesn't implement GetPodCgroupConfig() or SetPodCgroupConfig().
},
{
testName: "memory is nil error",
currentResources: v1.ResourceRequirements{
Requests: v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem100M},
},
desiredResources: v1.ResourceRequirements{
Requests: v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem200M},
},
qosClass: v1.PodQOSBurstable,
expectedError: "podResources.Memory is nil for pod",
},
{
testName: "cpu is nil error",
currentResources: v1.ResourceRequirements{
Requests: v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem100M},
},
desiredResources: v1.ResourceRequirements{
Requests: v1.ResourceList{v1.ResourceCPU: cpu200m, v1.ResourceMemory: mem100M},
},
qosClass: v1.PodQOSBurstable,
expectedError: "podResources.CPUQuota or podResources.CPUShares is nil for pod",
},
} {
t.Run(tc.testName, func(t *testing.T) {
pod, kps := makeBasePodAndStatus()
// pod spec and allocated resources are already updated as desired when doPodResizeAction() is called.
pod.Spec.Containers[0].Resources = tc.desiredResources
pod.Status.ContainerStatuses[0].AllocatedResources = tc.desiredResources.Requests
pod.Status.QOSClass = tc.qosClass
kcs := kps.FindContainerStatusByName(pod.Spec.Containers[0].Name)
cpuReqResized := !tc.currentResources.Requests.Cpu().Equal(*tc.desiredResources.Requests.Cpu())
cpuLimReiszed := !tc.currentResources.Limits.Cpu().Equal(*tc.desiredResources.Limits.Cpu())
memReqResized := !tc.currentResources.Requests.Memory().Equal(*tc.desiredResources.Requests.Memory())
memLimResized := !tc.currentResources.Limits.Memory().Equal(*tc.desiredResources.Limits.Memory())
updateInfo := containerToUpdateInfo{
apiContainerIdx: 0,
kubeContainerID: kcs.ID,
desiredContainerResources: containerResources{
cpuRequest: tc.desiredResources.Requests.Cpu().MilliValue(),
cpuLimit: tc.desiredResources.Limits.Cpu().MilliValue(),
memoryRequest: tc.desiredResources.Requests.Memory().Value(),
memoryLimit: tc.desiredResources.Limits.Memory().Value(),
},
currentContainerResources: &containerResources{
cpuRequest: tc.currentResources.Requests.Cpu().MilliValue(),
cpuLimit: tc.currentResources.Limits.Cpu().MilliValue(),
memoryRequest: tc.currentResources.Requests.Memory().Value(),
memoryLimit: tc.currentResources.Limits.Memory().Value(),
},
}
containersToUpdate := make(map[v1.ResourceName][]containerToUpdateInfo)
if cpuReqResized || cpuLimReiszed {
containersToUpdate[v1.ResourceCPU] = []containerToUpdateInfo{updateInfo}
}
if memReqResized || memLimResized {
containersToUpdate[v1.ResourceMemory] = []containerToUpdateInfo{updateInfo}
}
syncResult := m.doPodResizeAction(context.TODO(), pod, kps,
podActions{
ContainersToUpdate: containersToUpdate,
})
assert.ErrorContains(t, syncResult.Error(), tc.expectedError)
})
}
}