mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-22 03:11:40 +00:00
Merge pull request #116702 from vinaykul/restart-free-pod-vertical-scaling-podmutation-fix
Fix pod object update that may cause data race
This commit is contained in:
commit
c7cc7886e2
@ -1889,7 +1889,7 @@ func (kl *Kubelet) SyncPod(_ context.Context, updateType kubetypes.SyncPodType,
|
||||
// TODO(vinaykul,InPlacePodVerticalScaling): Investigate doing this in HandlePodUpdates + periodic SyncLoop scan
|
||||
// See: https://github.com/kubernetes/kubernetes/pull/102884#discussion_r663160060
|
||||
if kl.podWorkers.CouldHaveRunningContainers(pod.UID) && !kubetypes.IsStaticPod(pod) {
|
||||
kl.handlePodResourcesResize(pod)
|
||||
pod = kl.handlePodResourcesResize(pod)
|
||||
}
|
||||
}
|
||||
|
||||
@ -2631,13 +2631,14 @@ func (kl *Kubelet) canResizePod(pod *v1.Pod) (bool, *v1.Pod, v1.PodResizeStatus)
|
||||
klog.ErrorS(err, "getNodeAnyway function failed")
|
||||
return false, nil, ""
|
||||
}
|
||||
podCopy := pod.DeepCopy()
|
||||
cpuAvailable := node.Status.Allocatable.Cpu().MilliValue()
|
||||
memAvailable := node.Status.Allocatable.Memory().Value()
|
||||
cpuRequests := resource.GetResourceRequest(pod, v1.ResourceCPU)
|
||||
memRequests := resource.GetResourceRequest(pod, v1.ResourceMemory)
|
||||
cpuRequests := resource.GetResourceRequest(podCopy, v1.ResourceCPU)
|
||||
memRequests := resource.GetResourceRequest(podCopy, v1.ResourceMemory)
|
||||
if cpuRequests > cpuAvailable || memRequests > memAvailable {
|
||||
klog.V(3).InfoS("Resize is not feasible as request exceeds allocatable node resources", "pod", pod.Name)
|
||||
return false, nil, v1.PodResizeStatusInfeasible
|
||||
klog.V(3).InfoS("Resize is not feasible as request exceeds allocatable node resources", "pod", podCopy.Name)
|
||||
return false, podCopy, v1.PodResizeStatusInfeasible
|
||||
}
|
||||
|
||||
// Treat the existing pod needing resize as a new pod with desired resources seeking admit.
|
||||
@ -2649,13 +2650,12 @@ func (kl *Kubelet) canResizePod(pod *v1.Pod) (bool, *v1.Pod, v1.PodResizeStatus)
|
||||
}
|
||||
}
|
||||
|
||||
if ok, failReason, failMessage := kl.canAdmitPod(otherActivePods, pod); !ok {
|
||||
if ok, failReason, failMessage := kl.canAdmitPod(otherActivePods, podCopy); !ok {
|
||||
// Log reason and return. Let the next sync iteration retry the resize
|
||||
klog.V(3).InfoS("Resize cannot be accommodated", "pod", pod.Name, "reason", failReason, "message", failMessage)
|
||||
return false, nil, v1.PodResizeStatusDeferred
|
||||
klog.V(3).InfoS("Resize cannot be accommodated", "pod", podCopy.Name, "reason", failReason, "message", failMessage)
|
||||
return false, podCopy, v1.PodResizeStatusDeferred
|
||||
}
|
||||
|
||||
podCopy := pod.DeepCopy()
|
||||
for _, container := range podCopy.Spec.Containers {
|
||||
idx, found := podutil.GetIndexOfContainerStatus(podCopy.Status.ContainerStatuses, container.Name)
|
||||
if found {
|
||||
@ -2667,9 +2667,9 @@ func (kl *Kubelet) canResizePod(pod *v1.Pod) (bool, *v1.Pod, v1.PodResizeStatus)
|
||||
return true, podCopy, v1.PodResizeStatusInProgress
|
||||
}
|
||||
|
||||
func (kl *Kubelet) handlePodResourcesResize(pod *v1.Pod) {
|
||||
func (kl *Kubelet) handlePodResourcesResize(pod *v1.Pod) *v1.Pod {
|
||||
if pod.Status.Phase != v1.PodRunning {
|
||||
return
|
||||
return pod
|
||||
}
|
||||
podResized := false
|
||||
for _, container := range pod.Spec.Containers {
|
||||
@ -2691,31 +2691,35 @@ func (kl *Kubelet) handlePodResourcesResize(pod *v1.Pod) {
|
||||
}
|
||||
}
|
||||
if !podResized {
|
||||
return
|
||||
return pod
|
||||
}
|
||||
|
||||
kl.podResizeMutex.Lock()
|
||||
defer kl.podResizeMutex.Unlock()
|
||||
fit, updatedPod, resizeStatus := kl.canResizePod(pod)
|
||||
if updatedPod == nil {
|
||||
return pod
|
||||
}
|
||||
if fit {
|
||||
// Update pod resource allocation checkpoint
|
||||
if err := kl.statusManager.SetPodAllocation(updatedPod); err != nil {
|
||||
//TODO(vinaykul,InPlacePodVerticalScaling): Can we recover from this in some way? Investigate
|
||||
klog.ErrorS(err, "SetPodAllocation failed", "pod", klog.KObj(pod))
|
||||
klog.ErrorS(err, "SetPodAllocation failed", "pod", klog.KObj(updatedPod))
|
||||
return pod
|
||||
}
|
||||
*pod = *updatedPod
|
||||
}
|
||||
if resizeStatus != "" {
|
||||
// Save resize decision to checkpoint
|
||||
if err := kl.statusManager.SetPodResizeStatus(pod.UID, resizeStatus); err != nil {
|
||||
if err := kl.statusManager.SetPodResizeStatus(updatedPod.UID, resizeStatus); err != nil {
|
||||
//TODO(vinaykul,InPlacePodVerticalScaling): Can we recover from this in some way? Investigate
|
||||
klog.ErrorS(err, "SetPodResizeStatus failed", "pod", klog.KObj(pod))
|
||||
klog.ErrorS(err, "SetPodResizeStatus failed", "pod", klog.KObj(updatedPod))
|
||||
return pod
|
||||
}
|
||||
pod.Status.Resize = resizeStatus
|
||||
updatedPod.Status.Resize = resizeStatus
|
||||
}
|
||||
kl.podManager.UpdatePod(pod)
|
||||
kl.statusManager.SetPodStatus(pod, pod.Status)
|
||||
return
|
||||
kl.podManager.UpdatePod(updatedPod)
|
||||
kl.statusManager.SetPodStatus(updatedPod, updatedPod.Status)
|
||||
return updatedPod
|
||||
}
|
||||
|
||||
// LatestLoopEntryTime returns the last time in the sync loop monitor.
|
||||
|
@ -2585,8 +2585,10 @@ func TestHandlePodResourcesResize(t *testing.T) {
|
||||
tt.pod.Spec.Containers[0].Resources.Requests = tt.newRequests
|
||||
tt.pod.Status.ContainerStatuses[0].AllocatedResources = v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M}
|
||||
kubelet.handlePodResourcesResize(tt.pod)
|
||||
assert.Equal(t, tt.expectedAllocations, tt.pod.Status.ContainerStatuses[0].AllocatedResources, tt.name)
|
||||
assert.Equal(t, tt.expectedResize, tt.pod.Status.Resize, tt.name)
|
||||
updatedPod, found := kubelet.podManager.GetPodByName(tt.pod.Namespace, tt.pod.Name)
|
||||
assert.True(t, found, "expected to find pod %s", tt.pod.Name)
|
||||
assert.Equal(t, tt.expectedAllocations, updatedPod.Status.ContainerStatuses[0].AllocatedResources, tt.name)
|
||||
assert.Equal(t, tt.expectedResize, updatedPod.Status.Resize, tt.name)
|
||||
testKubelet.fakeKubeClient.ClearActions()
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user