mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-20 18:31:15 +00:00
Merge pull request #116702 from vinaykul/restart-free-pod-vertical-scaling-podmutation-fix
Fix pod object update that may cause data race
This commit is contained in:
commit
c7cc7886e2
@ -1889,7 +1889,7 @@ func (kl *Kubelet) SyncPod(_ context.Context, updateType kubetypes.SyncPodType,
|
|||||||
// TODO(vinaykul,InPlacePodVerticalScaling): Investigate doing this in HandlePodUpdates + periodic SyncLoop scan
|
// TODO(vinaykul,InPlacePodVerticalScaling): Investigate doing this in HandlePodUpdates + periodic SyncLoop scan
|
||||||
// See: https://github.com/kubernetes/kubernetes/pull/102884#discussion_r663160060
|
// See: https://github.com/kubernetes/kubernetes/pull/102884#discussion_r663160060
|
||||||
if kl.podWorkers.CouldHaveRunningContainers(pod.UID) && !kubetypes.IsStaticPod(pod) {
|
if kl.podWorkers.CouldHaveRunningContainers(pod.UID) && !kubetypes.IsStaticPod(pod) {
|
||||||
kl.handlePodResourcesResize(pod)
|
pod = kl.handlePodResourcesResize(pod)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2631,13 +2631,14 @@ func (kl *Kubelet) canResizePod(pod *v1.Pod) (bool, *v1.Pod, v1.PodResizeStatus)
|
|||||||
klog.ErrorS(err, "getNodeAnyway function failed")
|
klog.ErrorS(err, "getNodeAnyway function failed")
|
||||||
return false, nil, ""
|
return false, nil, ""
|
||||||
}
|
}
|
||||||
|
podCopy := pod.DeepCopy()
|
||||||
cpuAvailable := node.Status.Allocatable.Cpu().MilliValue()
|
cpuAvailable := node.Status.Allocatable.Cpu().MilliValue()
|
||||||
memAvailable := node.Status.Allocatable.Memory().Value()
|
memAvailable := node.Status.Allocatable.Memory().Value()
|
||||||
cpuRequests := resource.GetResourceRequest(pod, v1.ResourceCPU)
|
cpuRequests := resource.GetResourceRequest(podCopy, v1.ResourceCPU)
|
||||||
memRequests := resource.GetResourceRequest(pod, v1.ResourceMemory)
|
memRequests := resource.GetResourceRequest(podCopy, v1.ResourceMemory)
|
||||||
if cpuRequests > cpuAvailable || memRequests > memAvailable {
|
if cpuRequests > cpuAvailable || memRequests > memAvailable {
|
||||||
klog.V(3).InfoS("Resize is not feasible as request exceeds allocatable node resources", "pod", pod.Name)
|
klog.V(3).InfoS("Resize is not feasible as request exceeds allocatable node resources", "pod", podCopy.Name)
|
||||||
return false, nil, v1.PodResizeStatusInfeasible
|
return false, podCopy, v1.PodResizeStatusInfeasible
|
||||||
}
|
}
|
||||||
|
|
||||||
// Treat the existing pod needing resize as a new pod with desired resources seeking admit.
|
// Treat the existing pod needing resize as a new pod with desired resources seeking admit.
|
||||||
@ -2649,13 +2650,12 @@ func (kl *Kubelet) canResizePod(pod *v1.Pod) (bool, *v1.Pod, v1.PodResizeStatus)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ok, failReason, failMessage := kl.canAdmitPod(otherActivePods, pod); !ok {
|
if ok, failReason, failMessage := kl.canAdmitPod(otherActivePods, podCopy); !ok {
|
||||||
// Log reason and return. Let the next sync iteration retry the resize
|
// Log reason and return. Let the next sync iteration retry the resize
|
||||||
klog.V(3).InfoS("Resize cannot be accommodated", "pod", pod.Name, "reason", failReason, "message", failMessage)
|
klog.V(3).InfoS("Resize cannot be accommodated", "pod", podCopy.Name, "reason", failReason, "message", failMessage)
|
||||||
return false, nil, v1.PodResizeStatusDeferred
|
return false, podCopy, v1.PodResizeStatusDeferred
|
||||||
}
|
}
|
||||||
|
|
||||||
podCopy := pod.DeepCopy()
|
|
||||||
for _, container := range podCopy.Spec.Containers {
|
for _, container := range podCopy.Spec.Containers {
|
||||||
idx, found := podutil.GetIndexOfContainerStatus(podCopy.Status.ContainerStatuses, container.Name)
|
idx, found := podutil.GetIndexOfContainerStatus(podCopy.Status.ContainerStatuses, container.Name)
|
||||||
if found {
|
if found {
|
||||||
@ -2667,9 +2667,9 @@ func (kl *Kubelet) canResizePod(pod *v1.Pod) (bool, *v1.Pod, v1.PodResizeStatus)
|
|||||||
return true, podCopy, v1.PodResizeStatusInProgress
|
return true, podCopy, v1.PodResizeStatusInProgress
|
||||||
}
|
}
|
||||||
|
|
||||||
func (kl *Kubelet) handlePodResourcesResize(pod *v1.Pod) {
|
func (kl *Kubelet) handlePodResourcesResize(pod *v1.Pod) *v1.Pod {
|
||||||
if pod.Status.Phase != v1.PodRunning {
|
if pod.Status.Phase != v1.PodRunning {
|
||||||
return
|
return pod
|
||||||
}
|
}
|
||||||
podResized := false
|
podResized := false
|
||||||
for _, container := range pod.Spec.Containers {
|
for _, container := range pod.Spec.Containers {
|
||||||
@ -2691,31 +2691,35 @@ func (kl *Kubelet) handlePodResourcesResize(pod *v1.Pod) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if !podResized {
|
if !podResized {
|
||||||
return
|
return pod
|
||||||
}
|
}
|
||||||
|
|
||||||
kl.podResizeMutex.Lock()
|
kl.podResizeMutex.Lock()
|
||||||
defer kl.podResizeMutex.Unlock()
|
defer kl.podResizeMutex.Unlock()
|
||||||
fit, updatedPod, resizeStatus := kl.canResizePod(pod)
|
fit, updatedPod, resizeStatus := kl.canResizePod(pod)
|
||||||
|
if updatedPod == nil {
|
||||||
|
return pod
|
||||||
|
}
|
||||||
if fit {
|
if fit {
|
||||||
// Update pod resource allocation checkpoint
|
// Update pod resource allocation checkpoint
|
||||||
if err := kl.statusManager.SetPodAllocation(updatedPod); err != nil {
|
if err := kl.statusManager.SetPodAllocation(updatedPod); err != nil {
|
||||||
//TODO(vinaykul,InPlacePodVerticalScaling): Can we recover from this in some way? Investigate
|
//TODO(vinaykul,InPlacePodVerticalScaling): Can we recover from this in some way? Investigate
|
||||||
klog.ErrorS(err, "SetPodAllocation failed", "pod", klog.KObj(pod))
|
klog.ErrorS(err, "SetPodAllocation failed", "pod", klog.KObj(updatedPod))
|
||||||
|
return pod
|
||||||
}
|
}
|
||||||
*pod = *updatedPod
|
|
||||||
}
|
}
|
||||||
if resizeStatus != "" {
|
if resizeStatus != "" {
|
||||||
// Save resize decision to checkpoint
|
// Save resize decision to checkpoint
|
||||||
if err := kl.statusManager.SetPodResizeStatus(pod.UID, resizeStatus); err != nil {
|
if err := kl.statusManager.SetPodResizeStatus(updatedPod.UID, resizeStatus); err != nil {
|
||||||
//TODO(vinaykul,InPlacePodVerticalScaling): Can we recover from this in some way? Investigate
|
//TODO(vinaykul,InPlacePodVerticalScaling): Can we recover from this in some way? Investigate
|
||||||
klog.ErrorS(err, "SetPodResizeStatus failed", "pod", klog.KObj(pod))
|
klog.ErrorS(err, "SetPodResizeStatus failed", "pod", klog.KObj(updatedPod))
|
||||||
|
return pod
|
||||||
}
|
}
|
||||||
pod.Status.Resize = resizeStatus
|
updatedPod.Status.Resize = resizeStatus
|
||||||
}
|
}
|
||||||
kl.podManager.UpdatePod(pod)
|
kl.podManager.UpdatePod(updatedPod)
|
||||||
kl.statusManager.SetPodStatus(pod, pod.Status)
|
kl.statusManager.SetPodStatus(updatedPod, updatedPod.Status)
|
||||||
return
|
return updatedPod
|
||||||
}
|
}
|
||||||
|
|
||||||
// LatestLoopEntryTime returns the last time in the sync loop monitor.
|
// LatestLoopEntryTime returns the last time in the sync loop monitor.
|
||||||
|
@ -2585,8 +2585,10 @@ func TestHandlePodResourcesResize(t *testing.T) {
|
|||||||
tt.pod.Spec.Containers[0].Resources.Requests = tt.newRequests
|
tt.pod.Spec.Containers[0].Resources.Requests = tt.newRequests
|
||||||
tt.pod.Status.ContainerStatuses[0].AllocatedResources = v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M}
|
tt.pod.Status.ContainerStatuses[0].AllocatedResources = v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M}
|
||||||
kubelet.handlePodResourcesResize(tt.pod)
|
kubelet.handlePodResourcesResize(tt.pod)
|
||||||
assert.Equal(t, tt.expectedAllocations, tt.pod.Status.ContainerStatuses[0].AllocatedResources, tt.name)
|
updatedPod, found := kubelet.podManager.GetPodByName(tt.pod.Namespace, tt.pod.Name)
|
||||||
assert.Equal(t, tt.expectedResize, tt.pod.Status.Resize, tt.name)
|
assert.True(t, found, "expected to find pod %s", tt.pod.Name)
|
||||||
|
assert.Equal(t, tt.expectedAllocations, updatedPod.Status.ContainerStatuses[0].AllocatedResources, tt.name)
|
||||||
|
assert.Equal(t, tt.expectedResize, updatedPod.Status.Resize, tt.name)
|
||||||
testKubelet.fakeKubeClient.ClearActions()
|
testKubelet.fakeKubeClient.ClearActions()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user