mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-21 10:51:29 +00:00
Merge pull request #99212 from damemi/alculquicondor-log-timestamp
Logarithmic timestamp comparison for downscaling
This commit is contained in:
commit
bf448a1eaa
@ -22,6 +22,7 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"hash/fnv"
|
||||
"math"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
@ -807,6 +808,10 @@ func (s ActivePods) Less(i, j int) bool {
|
||||
// 8. If the pods' creation times differ, the pod that was created more recently
|
||||
// comes before the older pod.
|
||||
//
|
||||
// In 6 and 8, times are compared in a logarithmic scale. This allows a level
|
||||
// of randomness among equivalent Pods when sorting. If two pods have the same
|
||||
// logarithmic rank, they are sorted by UUID to provide a pseudorandom order.
|
||||
//
|
||||
// If none of these rules matches, the second pod comes before the first pod.
|
||||
//
|
||||
// The intention of this ordering is to put pods that should be preferred for
|
||||
@ -819,6 +824,10 @@ type ActivePodsWithRanks struct {
|
||||
// comparing two pods that are both scheduled, in the same phase, and
|
||||
// having the same ready status.
|
||||
Rank []int
|
||||
|
||||
// Now is a reference timestamp for doing logarithmic timestamp comparisons.
|
||||
// If zero, comparison happens without scaling.
|
||||
Now metav1.Time
|
||||
}
|
||||
|
||||
func (s ActivePodsWithRanks) Len() int {
|
||||
@ -872,7 +881,18 @@ func (s ActivePodsWithRanks) Less(i, j int) bool {
|
||||
readyTime1 := podReadyTime(s.Pods[i])
|
||||
readyTime2 := podReadyTime(s.Pods[j])
|
||||
if !readyTime1.Equal(readyTime2) {
|
||||
return afterOrZero(readyTime1, readyTime2)
|
||||
if !utilfeature.DefaultFeatureGate.Enabled(features.LogarithmicScaleDown) {
|
||||
return afterOrZero(readyTime1, readyTime2)
|
||||
} else {
|
||||
if s.Now.IsZero() || readyTime1.IsZero() || readyTime2.IsZero() {
|
||||
return afterOrZero(readyTime1, readyTime2)
|
||||
}
|
||||
rankDiff := logarithmicRankDiff(*readyTime1, *readyTime2, s.Now)
|
||||
if rankDiff == 0 {
|
||||
return s.Pods[i].UID < s.Pods[j].UID
|
||||
}
|
||||
return rankDiff < 0
|
||||
}
|
||||
}
|
||||
}
|
||||
// 7. Pods with containers with higher restart counts < lower restart counts
|
||||
@ -881,7 +901,18 @@ func (s ActivePodsWithRanks) Less(i, j int) bool {
|
||||
}
|
||||
// 8. Empty creation time pods < newer pods < older pods
|
||||
if !s.Pods[i].CreationTimestamp.Equal(&s.Pods[j].CreationTimestamp) {
|
||||
return afterOrZero(&s.Pods[i].CreationTimestamp, &s.Pods[j].CreationTimestamp)
|
||||
if !utilfeature.DefaultFeatureGate.Enabled(features.LogarithmicScaleDown) {
|
||||
return afterOrZero(&s.Pods[i].CreationTimestamp, &s.Pods[j].CreationTimestamp)
|
||||
} else {
|
||||
if s.Now.IsZero() || s.Pods[i].CreationTimestamp.IsZero() || s.Pods[j].CreationTimestamp.IsZero() {
|
||||
return afterOrZero(&s.Pods[i].CreationTimestamp, &s.Pods[j].CreationTimestamp)
|
||||
}
|
||||
rankDiff := logarithmicRankDiff(s.Pods[i].CreationTimestamp, s.Pods[j].CreationTimestamp, s.Now)
|
||||
if rankDiff == 0 {
|
||||
return s.Pods[i].UID < s.Pods[j].UID
|
||||
}
|
||||
return rankDiff < 0
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
@ -895,6 +926,22 @@ func afterOrZero(t1, t2 *metav1.Time) bool {
|
||||
return t1.After(t2.Time)
|
||||
}
|
||||
|
||||
// logarithmicRankDiff calculates the base-2 logarithmic ranks of 2 timestamps,
|
||||
// compared to the current timestamp
|
||||
func logarithmicRankDiff(t1, t2, now metav1.Time) int64 {
|
||||
d1 := now.Sub(t1.Time)
|
||||
d2 := now.Sub(t2.Time)
|
||||
r1 := int64(-1)
|
||||
r2 := int64(-1)
|
||||
if d1 > 0 {
|
||||
r1 = int64(math.Log2(float64(d1)))
|
||||
}
|
||||
if d2 > 0 {
|
||||
r2 = int64(math.Log2(float64(d2)))
|
||||
}
|
||||
return r1 - r2
|
||||
}
|
||||
|
||||
func podReadyTime(pod *v1.Pod) *metav1.Time {
|
||||
if podutil.IsPodReady(pod) {
|
||||
for _, c := range pod.Status.Conditions {
|
||||
|
@ -437,7 +437,10 @@ func TestSortingActivePods(t *testing.T) {
|
||||
|
||||
func TestSortingActivePodsWithRanks(t *testing.T) {
|
||||
now := metav1.Now()
|
||||
then := metav1.Time{Time: now.AddDate(0, -1, 0)}
|
||||
then1Month := metav1.Time{Time: now.AddDate(0, -1, 0)}
|
||||
then2Hours := metav1.Time{Time: now.Add(-2 * time.Hour)}
|
||||
then5Hours := metav1.Time{Time: now.Add(-5 * time.Hour)}
|
||||
then8Hours := metav1.Time{Time: now.Add(-8 * time.Hour)}
|
||||
zeroTime := metav1.Time{}
|
||||
pod := func(podName, nodeName string, phase v1.PodPhase, ready bool, restarts int32, readySince metav1.Time, created metav1.Time, annotations map[string]string) *v1.Pod {
|
||||
var conditions []v1.PodCondition
|
||||
@ -467,30 +470,46 @@ func TestSortingActivePodsWithRanks(t *testing.T) {
|
||||
runningNotReadyPod = pod("not-ready", "node", v1.PodRunning, false, 0, zeroTime, zeroTime, nil)
|
||||
runningReadyNoLastTransitionTimePod = pod("ready-no-last-transition-time", "node", v1.PodRunning, true, 0, zeroTime, zeroTime, nil)
|
||||
runningReadyNow = pod("ready-now", "node", v1.PodRunning, true, 0, now, now, nil)
|
||||
runningReadyThen = pod("ready-then", "node", v1.PodRunning, true, 0, then, then, nil)
|
||||
runningReadyThen = pod("ready-then", "node", v1.PodRunning, true, 0, then1Month, then1Month, nil)
|
||||
runningReadyNowHighRestarts = pod("ready-high-restarts", "node", v1.PodRunning, true, 9001, now, now, nil)
|
||||
runningReadyNowCreatedThen = pod("ready-now-created-then", "node", v1.PodRunning, true, 0, now, then, nil)
|
||||
lowPodDeletionCost = pod("low-deletion-cost", "node", v1.PodRunning, true, 0, now, then, map[string]string{core.PodDeletionCost: "10"})
|
||||
highPodDeletionCost = pod("high-deletion-cost", "node", v1.PodRunning, true, 0, now, then, map[string]string{core.PodDeletionCost: "100"})
|
||||
runningReadyNowCreatedThen = pod("ready-now-created-then", "node", v1.PodRunning, true, 0, now, then1Month, nil)
|
||||
lowPodDeletionCost = pod("low-deletion-cost", "node", v1.PodRunning, true, 0, now, then1Month, map[string]string{core.PodDeletionCost: "10"})
|
||||
highPodDeletionCost = pod("high-deletion-cost", "node", v1.PodRunning, true, 0, now, then1Month, map[string]string{core.PodDeletionCost: "100"})
|
||||
unscheduled5Hours = pod("unscheduled-5-hours", "", v1.PodPending, false, 0, then5Hours, then5Hours, nil)
|
||||
unscheduled8Hours = pod("unscheduled-10-hours", "", v1.PodPending, false, 0, then8Hours, then8Hours, nil)
|
||||
ready2Hours = pod("ready-2-hours", "", v1.PodRunning, true, 0, then2Hours, then1Month, nil)
|
||||
ready5Hours = pod("ready-5-hours", "", v1.PodRunning, true, 0, then5Hours, then1Month, nil)
|
||||
ready10Hours = pod("ready-10-hours", "", v1.PodRunning, true, 0, then8Hours, then1Month, nil)
|
||||
)
|
||||
equalityTests := []*v1.Pod{
|
||||
unscheduledPod,
|
||||
scheduledPendingPod,
|
||||
unknownPhasePod,
|
||||
runningNotReadyPod,
|
||||
runningReadyNowCreatedThen,
|
||||
runningReadyNow,
|
||||
runningReadyThen,
|
||||
runningReadyNowHighRestarts,
|
||||
runningReadyNowCreatedThen,
|
||||
equalityTests := []struct {
|
||||
p1 *v1.Pod
|
||||
p2 *v1.Pod
|
||||
disableLogarithmicScaleDown bool
|
||||
}{
|
||||
{p1: unscheduledPod},
|
||||
{p1: scheduledPendingPod},
|
||||
{p1: unknownPhasePod},
|
||||
{p1: runningNotReadyPod},
|
||||
{p1: runningReadyNowCreatedThen},
|
||||
{p1: runningReadyNow},
|
||||
{p1: runningReadyThen},
|
||||
{p1: runningReadyNowHighRestarts},
|
||||
{p1: runningReadyNowCreatedThen},
|
||||
{p1: unscheduled5Hours, p2: unscheduled8Hours},
|
||||
{p1: ready5Hours, p2: ready10Hours},
|
||||
}
|
||||
for _, pod := range equalityTests {
|
||||
for _, tc := range equalityTests {
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.LogarithmicScaleDown, !tc.disableLogarithmicScaleDown)()
|
||||
if tc.p2 == nil {
|
||||
tc.p2 = tc.p1
|
||||
}
|
||||
podsWithRanks := ActivePodsWithRanks{
|
||||
Pods: []*v1.Pod{pod, pod},
|
||||
Pods: []*v1.Pod{tc.p1, tc.p2},
|
||||
Rank: []int{1, 1},
|
||||
Now: now,
|
||||
}
|
||||
if podsWithRanks.Less(0, 1) || podsWithRanks.Less(1, 0) {
|
||||
t.Errorf("expected pod %q not to be less than than itself", pod.Name)
|
||||
t.Errorf("expected pod %q to be equivalent to %q", tc.p1.Name, tc.p2.Name)
|
||||
}
|
||||
}
|
||||
type podWithRank struct {
|
||||
@ -498,8 +517,9 @@ func TestSortingActivePodsWithRanks(t *testing.T) {
|
||||
rank int
|
||||
}
|
||||
inequalityTests := []struct {
|
||||
lesser, greater podWithRank
|
||||
disablePodDeletioncost bool
|
||||
lesser, greater podWithRank
|
||||
disablePodDeletioncost bool
|
||||
disableLogarithmicScaleDown bool
|
||||
}{
|
||||
{lesser: podWithRank{unscheduledPod, 1}, greater: podWithRank{scheduledPendingPod, 2}},
|
||||
{lesser: podWithRank{unscheduledPod, 2}, greater: podWithRank{scheduledPendingPod, 1}},
|
||||
@ -516,14 +536,17 @@ func TestSortingActivePodsWithRanks(t *testing.T) {
|
||||
{lesser: podWithRank{runningReadyNowCreatedThen, 2}, greater: podWithRank{runningReadyNow, 1}},
|
||||
{lesser: podWithRank{lowPodDeletionCost, 2}, greater: podWithRank{highPodDeletionCost, 1}},
|
||||
{lesser: podWithRank{highPodDeletionCost, 2}, greater: podWithRank{lowPodDeletionCost, 1}, disablePodDeletioncost: true},
|
||||
{lesser: podWithRank{ready2Hours, 1}, greater: podWithRank{ready5Hours, 1}},
|
||||
}
|
||||
for i, test := range inequalityTests {
|
||||
t.Run(fmt.Sprintf("test%d", i), func(t *testing.T) {
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDeletionCost, !test.disablePodDeletioncost)()
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.LogarithmicScaleDown, !test.disableLogarithmicScaleDown)()
|
||||
|
||||
podsWithRanks := ActivePodsWithRanks{
|
||||
Pods: []*v1.Pod{test.lesser.pod, test.greater.pod},
|
||||
Rank: []int{test.lesser.rank, test.greater.rank},
|
||||
Now: now,
|
||||
}
|
||||
if !podsWithRanks.Less(0, 1) {
|
||||
t.Errorf("expected pod %q with rank %v to be less than %q with rank %v", podsWithRanks.Pods[0].Name, podsWithRanks.Rank[0], podsWithRanks.Pods[1].Name, podsWithRanks.Rank[1])
|
||||
|
@ -822,7 +822,7 @@ func getPodsRankedByRelatedPodsOnSameNode(podsToRank, relatedPods []*v1.Pod) con
|
||||
for i, pod := range podsToRank {
|
||||
ranks[i] = podsOnNode[pod.Spec.NodeName]
|
||||
}
|
||||
return controller.ActivePodsWithRanks{Pods: podsToRank, Rank: ranks}
|
||||
return controller.ActivePodsWithRanks{Pods: podsToRank, Rank: ranks, Now: metav1.Now()}
|
||||
}
|
||||
|
||||
func getPodKeys(pods []*v1.Pod) []string {
|
||||
|
@ -692,6 +692,12 @@ const (
|
||||
//
|
||||
// Enable support multiple Service "type: LoadBalancer" implementations in a cluster by specifying LoadBalancerClass
|
||||
ServiceLoadBalancerClass featuregate.Feature = "ServiceLoadBalancerClass"
|
||||
|
||||
// owner: @damemi
|
||||
// aplpha: v1.21
|
||||
//
|
||||
// Enables scaling down replicas via logarithmic comparison of creation/ready timestamps
|
||||
LogarithmicScaleDown featuregate.Feature = "LogarithmicScaleDown"
|
||||
)
|
||||
|
||||
func init() {
|
||||
@ -797,6 +803,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
|
||||
PodDeletionCost: {Default: false, PreRelease: featuregate.Alpha},
|
||||
PodAffinityNamespaceSelector: {Default: false, PreRelease: featuregate.Alpha},
|
||||
ServiceLoadBalancerClass: {Default: false, PreRelease: featuregate.Alpha},
|
||||
LogarithmicScaleDown: {Default: false, PreRelease: featuregate.Alpha},
|
||||
|
||||
// inherited features from generic apiserver, relisted here to get a conflict if it is changed
|
||||
// unintentionally on either side:
|
||||
|
@ -25,19 +25,23 @@ import (
|
||||
"time"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
apiequality "k8s.io/apimachinery/pkg/api/equality"
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
"k8s.io/apimachinery/pkg/util/uuid"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/client-go/informers"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
typedv1 "k8s.io/client-go/kubernetes/typed/core/v1"
|
||||
restclient "k8s.io/client-go/rest"
|
||||
"k8s.io/client-go/tools/cache"
|
||||
"k8s.io/client-go/util/retry"
|
||||
featuregatetesting "k8s.io/component-base/featuregate/testing"
|
||||
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
|
||||
"k8s.io/kubernetes/pkg/controller/replication"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
"k8s.io/kubernetes/test/integration/framework"
|
||||
)
|
||||
|
||||
@ -491,6 +495,44 @@ func TestSpecReplicasChange(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestLogarithmicScaleDown(t *testing.T) {
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.LogarithmicScaleDown, true)()
|
||||
s, closeFn, rm, informers, c := rmSetup(t)
|
||||
defer closeFn()
|
||||
ns := framework.CreateTestingNamespace("test-spec-replicas-change", s, t)
|
||||
defer framework.DeleteTestingNamespace(ns, s, t)
|
||||
stopCh := runControllerAndInformers(t, rm, informers, 0)
|
||||
defer close(stopCh)
|
||||
|
||||
rc := newRC("rc", ns.Name, 2)
|
||||
rcs, _ := createRCsPods(t, c, []*v1.ReplicationController{rc}, []*v1.Pod{})
|
||||
rc = rcs[0]
|
||||
waitRCStable(t, c, rc)
|
||||
|
||||
// get list of pods in the cluster
|
||||
pods, err := c.CoreV1().Pods(ns.Name).List(context.TODO(), metav1.ListOptions{})
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get pods in namespace %s: %+v", ns.Name, err)
|
||||
}
|
||||
|
||||
// Wait 10 seconds and scale up, the new pod should be in a new logarithmic rank from the first 2
|
||||
time.Sleep(10 * time.Second)
|
||||
scaleRC(t, c, rc, 3)
|
||||
|
||||
// scale back down, and confirm that the pods left in the namespace are the original ones
|
||||
// (meaning the 3rd one was deleted)
|
||||
scaleRC(t, c, rc, 2)
|
||||
|
||||
newPods, err := c.CoreV1().Pods(ns.Name).List(context.TODO(), metav1.ListOptions{})
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get pods in namespace %s: %+v", ns.Name, err)
|
||||
}
|
||||
|
||||
if !apiequality.Semantic.DeepEqual(pods.Items, newPods.Items) {
|
||||
t.Fatalf("expected pods %+v, got %+v", pods.Items, newPods.Items)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeletingAndFailedPods(t *testing.T) {
|
||||
s, closeFn, rm, informers, c := rmSetup(t)
|
||||
defer closeFn()
|
||||
|
Loading…
Reference in New Issue
Block a user