Fix slow reconcile when quickly reverting resize patch

This commit is contained in:
Sotiris Salloumis 2024-06-27 10:57:25 +02:00
parent 9c571abeec
commit 68fcc9cf8a
5 changed files with 28 additions and 5 deletions

View File

@ -50,6 +50,7 @@ import (
"k8s.io/utils/ptr"
v1 "k8s.io/api/core/v1"
apiequality "k8s.io/apimachinery/pkg/api/equality"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/labels"
@ -2898,6 +2899,12 @@ func (kl *Kubelet) handlePodResourcesResize(pod *v1.Pod, podStatus *kubecontaine
if err := kl.statusManager.SetPodAllocation(pod); err != nil {
return nil, err
}
for i, container := range pod.Spec.Containers {
if !apiequality.Semantic.DeepEqual(container.Resources, allocatedPod.Spec.Containers[i].Resources) {
key := kuberuntime.GetStableKey(pod, &container)
kl.backOff.Reset(key)
}
}
allocatedPod = pod
}
if resizeStatus != "" {

View File

@ -2676,6 +2676,7 @@ func TestHandlePodResourcesResize(t *testing.T) {
newRequestsAllocated bool // Whether the new requests have already been allocated (but not actuated)
expectedAllocations v1.ResourceList
expectedResize v1.PodResizeStatus
expectBackoffReset bool
goos string
}{
{
@ -2684,6 +2685,7 @@ func TestHandlePodResourcesResize(t *testing.T) {
newRequests: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M},
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M},
expectedResize: v1.PodResizeStatusInProgress,
expectBackoffReset: true,
},
{
name: "Request CPU increase, memory decrease - expect InProgress",
@ -2691,6 +2693,7 @@ func TestHandlePodResourcesResize(t *testing.T) {
newRequests: v1.ResourceList{v1.ResourceCPU: cpu1500m, v1.ResourceMemory: mem500M},
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu1500m, v1.ResourceMemory: mem500M},
expectedResize: v1.PodResizeStatusInProgress,
expectBackoffReset: true,
},
{
name: "Request CPU decrease, memory increase - expect InProgress",
@ -2698,6 +2701,7 @@ func TestHandlePodResourcesResize(t *testing.T) {
newRequests: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem1500M},
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem1500M},
expectedResize: v1.PodResizeStatusInProgress,
expectBackoffReset: true,
},
{
name: "Request CPU and memory increase beyond current capacity - expect Deferred",
@ -2788,6 +2792,11 @@ func TestHandlePodResourcesResize(t *testing.T) {
}
}
now := kubelet.clock.Now()
// Put the container in backoff so we can confirm backoff is reset.
backoffKey := kuberuntime.GetStableKey(tt.pod, &tt.pod.Spec.Containers[0])
kubelet.backOff.Next(backoffKey, now)
updatedPod, err := kubelet.handlePodResourcesResize(newPod, podStatus)
require.NoError(t, err)
assert.Equal(t, tt.expectedAllocations, updatedPod.Spec.Containers[0].Resources.Requests, "updated pod spec resources")
@ -2798,6 +2807,13 @@ func TestHandlePodResourcesResize(t *testing.T) {
resizeStatus := kubelet.statusManager.GetPodResizeStatus(newPod.UID)
assert.Equal(t, tt.expectedResize, resizeStatus)
isInBackoff := kubelet.backOff.IsInBackOffSince(backoffKey, now)
if tt.expectBackoffReset {
assert.False(t, isInBackoff, "container backoff should be reset")
} else {
assert.True(t, isInBackoff, "container backoff should not be reset")
}
})
}
}

View File

@ -172,10 +172,10 @@ func isInitContainerFailed(status *kubecontainer.Status) bool {
return false
}
// getStableKey generates a key (string) to uniquely identify a
// GetStableKey generates a key (string) to uniquely identify a
// (pod, container) tuple. The key should include the content of the
// container, so that any change to the container generates a new key.
func getStableKey(pod *v1.Pod, container *v1.Container) string {
func GetStableKey(pod *v1.Pod, container *v1.Container) string {
hash := strconv.FormatUint(kubecontainer.HashContainer(container), 16)
return fmt.Sprintf("%s_%s_%s_%s_%s", pod.Name, pod.Namespace, string(pod.UID), container.Name, hash)
}

View File

@ -113,11 +113,11 @@ func TestStableKey(t *testing.T) {
Containers: []v1.Container{*container},
},
}
oldKey := getStableKey(pod, container)
oldKey := GetStableKey(pod, container)
// Updating the container image should change the key.
container.Image = "foo/image:v2"
newKey := getStableKey(pod, container)
newKey := GetStableKey(pod, container)
assert.NotEqual(t, oldKey, newKey)
}

View File

@ -1433,7 +1433,7 @@ func (m *kubeGenericRuntimeManager) doBackOff(pod *v1.Pod, container *v1.Contain
// Use the finished time of the latest exited container as the start point to calculate whether to do back-off.
ts := cStatus.FinishedAt
// backOff requires a unique key to identify the container.
key := getStableKey(pod, container)
key := GetStableKey(pod, container)
if backOff.IsInBackOffSince(key, ts) {
if containerRef, err := kubecontainer.GenerateContainerRef(pod, container); err == nil {
m.recorder.Eventf(containerRef, v1.EventTypeWarning, events.BackOffStartContainer,