From 1c7850c355e2335c72e9503d1c6f7804789cc6ba Mon Sep 17 00:00:00 2001 From: vinay kulkarni Date: Sun, 12 Mar 2023 05:59:14 +0000 Subject: [PATCH 1/3] Fix null pointer access in doPodResizeAction for kubeletonly mode --- pkg/kubelet/cm/container_manager_stub.go | 6 ++++-- pkg/kubelet/kuberuntime/kuberuntime_manager.go | 13 ++++++++++++- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/pkg/kubelet/cm/container_manager_stub.go b/pkg/kubelet/cm/container_manager_stub.go index 56176e22d9c..6fa2f62555b 100644 --- a/pkg/kubelet/cm/container_manager_stub.go +++ b/pkg/kubelet/cm/container_manager_stub.go @@ -17,6 +17,8 @@ limitations under the License. package cm import ( + "fmt" + v1 "k8s.io/api/core/v1" "k8s.io/klog/v2" @@ -96,11 +98,11 @@ func (cm *containerManagerStub) GetDevicePluginResourceCapacity() (v1.ResourceLi } func (m *podContainerManagerStub) GetPodCgroupConfig(_ *v1.Pod, _ v1.ResourceName) (*ResourceConfig, error) { - return nil, nil + return nil, fmt.Errorf("not implemented") } func (m *podContainerManagerStub) SetPodCgroupConfig(_ *v1.Pod, _ v1.ResourceName, _ *ResourceConfig) error { - return nil + return fmt.Errorf("not implemented") } func (cm *containerManagerStub) NewPodContainerManager() PodContainerManager { diff --git a/pkg/kubelet/kuberuntime/kuberuntime_manager.go b/pkg/kubelet/kuberuntime/kuberuntime_manager.go index ca3dc026784..5337e8ad1db 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_manager.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_manager.go @@ -697,6 +697,11 @@ func (m *kubeGenericRuntimeManager) doPodResizeAction(pod *v1.Pod, podStatus *ku return err } if len(podContainerChanges.ContainersToUpdate[v1.ResourceMemory]) > 0 || podContainerChanges.UpdatePodResources { + if podResources.Memory == nil { + klog.ErrorS(nil, "podResources.Memory is nil", "pod", pod.Name) + result.Fail(fmt.Errorf("podResources.Memory is nil for pod %s", pod.Name)) + return + } currentPodMemoryConfig, err := pcm.GetPodCgroupConfig(pod, v1.ResourceMemory) if err != nil { klog.ErrorS(err, "GetPodCgroupConfig for memory failed", "pod", pod.Name) @@ -720,6 +725,11 @@ func (m *kubeGenericRuntimeManager) doPodResizeAction(pod *v1.Pod, podStatus *ku } } if len(podContainerChanges.ContainersToUpdate[v1.ResourceCPU]) > 0 || podContainerChanges.UpdatePodResources { + if podResources.CPUQuota == nil || podResources.CPUShares == nil { + klog.ErrorS(nil, "podResources.CPUQuota or podResources.CPUShares is nil", "pod", pod.Name) + result.Fail(fmt.Errorf("podResources.CPUQuota or podResources.CPUShares is nil for pod %s", pod.Name)) + return + } currentPodCpuConfig, err := pcm.GetPodCgroupConfig(pod, v1.ResourceCPU) if err != nil { klog.ErrorS(err, "GetPodCgroupConfig for CPU failed", "pod", pod.Name) @@ -941,6 +951,7 @@ func (m *kubeGenericRuntimeManager) computePodActions(ctx context.Context, pod * message = fmt.Sprintf("Container %s failed startup probe", container.Name) reason = reasonStartupProbe } else if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) && + !types.IsStaticPod(pod) && !m.computePodResizeAction(pod, idx, containerStatus, &changes) { // computePodResizeAction updates 'changes' if resize policy requires restarting this container continue @@ -1213,7 +1224,7 @@ func (m *kubeGenericRuntimeManager) SyncPod(ctx context.Context, pod *v1.Pod, po } // Step 7: For containers in podContainerChanges.ContainersToUpdate[CPU,Memory] list, invoke UpdateContainerResources - if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { + if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) && !types.IsStaticPod(pod) { if len(podContainerChanges.ContainersToUpdate) > 0 || podContainerChanges.UpdatePodResources { m.doPodResizeAction(pod, podStatus, podContainerChanges, result) } From 5b2682ac0437a3a30d4d8447d86b537e124d1721 Mon Sep 17 00:00:00 2001 From: vinay kulkarni Date: Tue, 14 Mar 2023 19:37:35 +0000 Subject: [PATCH 2/3] Make in-place resize exclusion conditions (such as static pods) very obvious --- pkg/kubelet/kuberuntime/kuberuntime_manager.go | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/pkg/kubelet/kuberuntime/kuberuntime_manager.go b/pkg/kubelet/kuberuntime/kuberuntime_manager.go index 5337e8ad1db..5721b9c9ede 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_manager.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_manager.go @@ -524,6 +524,16 @@ func containerSucceeded(c *v1.Container, podStatus *kubecontainer.PodStatus) boo return cStatus.ExitCode == 0 } +func isInPlacePodVerticalScalingAllowed(pod *v1.Pod) bool { + if !utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { + return false + } + if types.IsStaticPod(pod) { + return false + } + return true +} + func (m *kubeGenericRuntimeManager) computePodResizeAction(pod *v1.Pod, containerIdx int, kubeContainerStatus *kubecontainer.Status, changes *podActions) bool { container := pod.Spec.Containers[containerIdx] if container.Resources.Limits == nil || len(pod.Status.ContainerStatuses) == 0 { @@ -885,7 +895,7 @@ func (m *kubeGenericRuntimeManager) computePodActions(ctx context.Context, pod * return changes } - if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { + if isInPlacePodVerticalScalingAllowed(pod) { changes.ContainersToUpdate = make(map[v1.ResourceName][]containerToUpdateInfo) latestPodStatus, err := m.GetPodStatus(ctx, podStatus.ID, pod.Name, pod.Namespace) if err == nil { @@ -950,9 +960,7 @@ func (m *kubeGenericRuntimeManager) computePodActions(ctx context.Context, pod * // If the container failed the startup probe, we should kill it. message = fmt.Sprintf("Container %s failed startup probe", container.Name) reason = reasonStartupProbe - } else if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) && - !types.IsStaticPod(pod) && - !m.computePodResizeAction(pod, idx, containerStatus, &changes) { + } else if isInPlacePodVerticalScalingAllowed(pod) && !m.computePodResizeAction(pod, idx, containerStatus, &changes) { // computePodResizeAction updates 'changes' if resize policy requires restarting this container continue } else { @@ -1224,7 +1232,7 @@ func (m *kubeGenericRuntimeManager) SyncPod(ctx context.Context, pod *v1.Pod, po } // Step 7: For containers in podContainerChanges.ContainersToUpdate[CPU,Memory] list, invoke UpdateContainerResources - if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) && !types.IsStaticPod(pod) { + if isInPlacePodVerticalScalingAllowed(pod) { if len(podContainerChanges.ContainersToUpdate) > 0 || podContainerChanges.UpdatePodResources { m.doPodResizeAction(pod, podStatus, podContainerChanges, result) } From 86efc8bd79c75d171bc448239002f4c746addf32 Mon Sep 17 00:00:00 2001 From: vinay kulkarni Date: Tue, 14 Mar 2023 20:30:02 +0000 Subject: [PATCH 3/3] Add isInPlacePodVerticalScalingAllowed for restart check block --- pkg/kubelet/kuberuntime/kuberuntime_manager.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/kubelet/kuberuntime/kuberuntime_manager.go b/pkg/kubelet/kuberuntime/kuberuntime_manager.go index 5721b9c9ede..43f3804a832 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_manager.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_manager.go @@ -946,7 +946,7 @@ func (m *kubeGenericRuntimeManager) computePodActions(ctx context.Context, pod * restart := shouldRestartOnFailure(pod) // Do not restart if only the Resources field has changed with InPlacePodVerticalScaling enabled if _, _, changed := containerChanged(&container, containerStatus); changed && - (!utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) || + (!isInPlacePodVerticalScalingAllowed(pod) || kubecontainer.HashContainerWithoutResources(&container) != containerStatus.HashWithoutResources) { message = fmt.Sprintf("Container %s definition changed", container.Name) // Restart regardless of the restart policy because the container