Merge pull request #99212 from damemi/alculquicondor-log-timestamp

Logarithmic timestamp comparison for downscaling
This commit is contained in:
Kubernetes Prow Robot 2021-03-06 09:47:41 -08:00 committed by GitHub
commit bf448a1eaa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 142 additions and 23 deletions

View File

@ -22,6 +22,7 @@ import (
"encoding/json"
"fmt"
"hash/fnv"
"math"
"sync"
"sync/atomic"
"time"
@ -807,6 +808,10 @@ func (s ActivePods) Less(i, j int) bool {
// 8. If the pods' creation times differ, the pod that was created more recently
// comes before the older pod.
//
// In 6 and 8, times are compared in a logarithmic scale. This allows a level
// of randomness among equivalent Pods when sorting. If two pods have the same
// logarithmic rank, they are sorted by UUID to provide a pseudorandom order.
//
// If none of these rules matches, the second pod comes before the first pod.
//
// The intention of this ordering is to put pods that should be preferred for
@ -819,6 +824,10 @@ type ActivePodsWithRanks struct {
// comparing two pods that are both scheduled, in the same phase, and
// having the same ready status.
Rank []int
// Now is a reference timestamp for doing logarithmic timestamp comparisons.
// If zero, comparison happens without scaling.
Now metav1.Time
}
func (s ActivePodsWithRanks) Len() int {
@ -872,7 +881,18 @@ func (s ActivePodsWithRanks) Less(i, j int) bool {
readyTime1 := podReadyTime(s.Pods[i])
readyTime2 := podReadyTime(s.Pods[j])
if !readyTime1.Equal(readyTime2) {
return afterOrZero(readyTime1, readyTime2)
if !utilfeature.DefaultFeatureGate.Enabled(features.LogarithmicScaleDown) {
return afterOrZero(readyTime1, readyTime2)
} else {
if s.Now.IsZero() || readyTime1.IsZero() || readyTime2.IsZero() {
return afterOrZero(readyTime1, readyTime2)
}
rankDiff := logarithmicRankDiff(*readyTime1, *readyTime2, s.Now)
if rankDiff == 0 {
return s.Pods[i].UID < s.Pods[j].UID
}
return rankDiff < 0
}
}
}
// 7. Pods with containers with higher restart counts < lower restart counts
@ -881,7 +901,18 @@ func (s ActivePodsWithRanks) Less(i, j int) bool {
}
// 8. Empty creation time pods < newer pods < older pods
if !s.Pods[i].CreationTimestamp.Equal(&s.Pods[j].CreationTimestamp) {
return afterOrZero(&s.Pods[i].CreationTimestamp, &s.Pods[j].CreationTimestamp)
if !utilfeature.DefaultFeatureGate.Enabled(features.LogarithmicScaleDown) {
return afterOrZero(&s.Pods[i].CreationTimestamp, &s.Pods[j].CreationTimestamp)
} else {
if s.Now.IsZero() || s.Pods[i].CreationTimestamp.IsZero() || s.Pods[j].CreationTimestamp.IsZero() {
return afterOrZero(&s.Pods[i].CreationTimestamp, &s.Pods[j].CreationTimestamp)
}
rankDiff := logarithmicRankDiff(s.Pods[i].CreationTimestamp, s.Pods[j].CreationTimestamp, s.Now)
if rankDiff == 0 {
return s.Pods[i].UID < s.Pods[j].UID
}
return rankDiff < 0
}
}
return false
}
@ -895,6 +926,22 @@ func afterOrZero(t1, t2 *metav1.Time) bool {
return t1.After(t2.Time)
}
// logarithmicRankDiff calculates the base-2 logarithmic ranks of 2 timestamps,
// compared to the current timestamp
func logarithmicRankDiff(t1, t2, now metav1.Time) int64 {
d1 := now.Sub(t1.Time)
d2 := now.Sub(t2.Time)
r1 := int64(-1)
r2 := int64(-1)
if d1 > 0 {
r1 = int64(math.Log2(float64(d1)))
}
if d2 > 0 {
r2 = int64(math.Log2(float64(d2)))
}
return r1 - r2
}
func podReadyTime(pod *v1.Pod) *metav1.Time {
if podutil.IsPodReady(pod) {
for _, c := range pod.Status.Conditions {

View File

@ -437,7 +437,10 @@ func TestSortingActivePods(t *testing.T) {
func TestSortingActivePodsWithRanks(t *testing.T) {
now := metav1.Now()
then := metav1.Time{Time: now.AddDate(0, -1, 0)}
then1Month := metav1.Time{Time: now.AddDate(0, -1, 0)}
then2Hours := metav1.Time{Time: now.Add(-2 * time.Hour)}
then5Hours := metav1.Time{Time: now.Add(-5 * time.Hour)}
then8Hours := metav1.Time{Time: now.Add(-8 * time.Hour)}
zeroTime := metav1.Time{}
pod := func(podName, nodeName string, phase v1.PodPhase, ready bool, restarts int32, readySince metav1.Time, created metav1.Time, annotations map[string]string) *v1.Pod {
var conditions []v1.PodCondition
@ -467,30 +470,46 @@ func TestSortingActivePodsWithRanks(t *testing.T) {
runningNotReadyPod = pod("not-ready", "node", v1.PodRunning, false, 0, zeroTime, zeroTime, nil)
runningReadyNoLastTransitionTimePod = pod("ready-no-last-transition-time", "node", v1.PodRunning, true, 0, zeroTime, zeroTime, nil)
runningReadyNow = pod("ready-now", "node", v1.PodRunning, true, 0, now, now, nil)
runningReadyThen = pod("ready-then", "node", v1.PodRunning, true, 0, then, then, nil)
runningReadyThen = pod("ready-then", "node", v1.PodRunning, true, 0, then1Month, then1Month, nil)
runningReadyNowHighRestarts = pod("ready-high-restarts", "node", v1.PodRunning, true, 9001, now, now, nil)
runningReadyNowCreatedThen = pod("ready-now-created-then", "node", v1.PodRunning, true, 0, now, then, nil)
lowPodDeletionCost = pod("low-deletion-cost", "node", v1.PodRunning, true, 0, now, then, map[string]string{core.PodDeletionCost: "10"})
highPodDeletionCost = pod("high-deletion-cost", "node", v1.PodRunning, true, 0, now, then, map[string]string{core.PodDeletionCost: "100"})
runningReadyNowCreatedThen = pod("ready-now-created-then", "node", v1.PodRunning, true, 0, now, then1Month, nil)
lowPodDeletionCost = pod("low-deletion-cost", "node", v1.PodRunning, true, 0, now, then1Month, map[string]string{core.PodDeletionCost: "10"})
highPodDeletionCost = pod("high-deletion-cost", "node", v1.PodRunning, true, 0, now, then1Month, map[string]string{core.PodDeletionCost: "100"})
unscheduled5Hours = pod("unscheduled-5-hours", "", v1.PodPending, false, 0, then5Hours, then5Hours, nil)
unscheduled8Hours = pod("unscheduled-10-hours", "", v1.PodPending, false, 0, then8Hours, then8Hours, nil)
ready2Hours = pod("ready-2-hours", "", v1.PodRunning, true, 0, then2Hours, then1Month, nil)
ready5Hours = pod("ready-5-hours", "", v1.PodRunning, true, 0, then5Hours, then1Month, nil)
ready10Hours = pod("ready-10-hours", "", v1.PodRunning, true, 0, then8Hours, then1Month, nil)
)
equalityTests := []*v1.Pod{
unscheduledPod,
scheduledPendingPod,
unknownPhasePod,
runningNotReadyPod,
runningReadyNowCreatedThen,
runningReadyNow,
runningReadyThen,
runningReadyNowHighRestarts,
runningReadyNowCreatedThen,
equalityTests := []struct {
p1 *v1.Pod
p2 *v1.Pod
disableLogarithmicScaleDown bool
}{
{p1: unscheduledPod},
{p1: scheduledPendingPod},
{p1: unknownPhasePod},
{p1: runningNotReadyPod},
{p1: runningReadyNowCreatedThen},
{p1: runningReadyNow},
{p1: runningReadyThen},
{p1: runningReadyNowHighRestarts},
{p1: runningReadyNowCreatedThen},
{p1: unscheduled5Hours, p2: unscheduled8Hours},
{p1: ready5Hours, p2: ready10Hours},
}
for _, pod := range equalityTests {
for _, tc := range equalityTests {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.LogarithmicScaleDown, !tc.disableLogarithmicScaleDown)()
if tc.p2 == nil {
tc.p2 = tc.p1
}
podsWithRanks := ActivePodsWithRanks{
Pods: []*v1.Pod{pod, pod},
Pods: []*v1.Pod{tc.p1, tc.p2},
Rank: []int{1, 1},
Now: now,
}
if podsWithRanks.Less(0, 1) || podsWithRanks.Less(1, 0) {
t.Errorf("expected pod %q not to be less than than itself", pod.Name)
t.Errorf("expected pod %q to be equivalent to %q", tc.p1.Name, tc.p2.Name)
}
}
type podWithRank struct {
@ -498,8 +517,9 @@ func TestSortingActivePodsWithRanks(t *testing.T) {
rank int
}
inequalityTests := []struct {
lesser, greater podWithRank
disablePodDeletioncost bool
lesser, greater podWithRank
disablePodDeletioncost bool
disableLogarithmicScaleDown bool
}{
{lesser: podWithRank{unscheduledPod, 1}, greater: podWithRank{scheduledPendingPod, 2}},
{lesser: podWithRank{unscheduledPod, 2}, greater: podWithRank{scheduledPendingPod, 1}},
@ -516,14 +536,17 @@ func TestSortingActivePodsWithRanks(t *testing.T) {
{lesser: podWithRank{runningReadyNowCreatedThen, 2}, greater: podWithRank{runningReadyNow, 1}},
{lesser: podWithRank{lowPodDeletionCost, 2}, greater: podWithRank{highPodDeletionCost, 1}},
{lesser: podWithRank{highPodDeletionCost, 2}, greater: podWithRank{lowPodDeletionCost, 1}, disablePodDeletioncost: true},
{lesser: podWithRank{ready2Hours, 1}, greater: podWithRank{ready5Hours, 1}},
}
for i, test := range inequalityTests {
t.Run(fmt.Sprintf("test%d", i), func(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDeletionCost, !test.disablePodDeletioncost)()
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.LogarithmicScaleDown, !test.disableLogarithmicScaleDown)()
podsWithRanks := ActivePodsWithRanks{
Pods: []*v1.Pod{test.lesser.pod, test.greater.pod},
Rank: []int{test.lesser.rank, test.greater.rank},
Now: now,
}
if !podsWithRanks.Less(0, 1) {
t.Errorf("expected pod %q with rank %v to be less than %q with rank %v", podsWithRanks.Pods[0].Name, podsWithRanks.Rank[0], podsWithRanks.Pods[1].Name, podsWithRanks.Rank[1])

View File

@ -822,7 +822,7 @@ func getPodsRankedByRelatedPodsOnSameNode(podsToRank, relatedPods []*v1.Pod) con
for i, pod := range podsToRank {
ranks[i] = podsOnNode[pod.Spec.NodeName]
}
return controller.ActivePodsWithRanks{Pods: podsToRank, Rank: ranks}
return controller.ActivePodsWithRanks{Pods: podsToRank, Rank: ranks, Now: metav1.Now()}
}
func getPodKeys(pods []*v1.Pod) []string {

View File

@ -692,6 +692,12 @@ const (
//
// Enable support multiple Service "type: LoadBalancer" implementations in a cluster by specifying LoadBalancerClass
ServiceLoadBalancerClass featuregate.Feature = "ServiceLoadBalancerClass"
// owner: @damemi
// aplpha: v1.21
//
// Enables scaling down replicas via logarithmic comparison of creation/ready timestamps
LogarithmicScaleDown featuregate.Feature = "LogarithmicScaleDown"
)
func init() {
@ -797,6 +803,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
PodDeletionCost: {Default: false, PreRelease: featuregate.Alpha},
PodAffinityNamespaceSelector: {Default: false, PreRelease: featuregate.Alpha},
ServiceLoadBalancerClass: {Default: false, PreRelease: featuregate.Alpha},
LogarithmicScaleDown: {Default: false, PreRelease: featuregate.Alpha},
// inherited features from generic apiserver, relisted here to get a conflict if it is changed
// unintentionally on either side:

View File

@ -25,19 +25,23 @@ import (
"time"
"k8s.io/api/core/v1"
apiequality "k8s.io/apimachinery/pkg/api/equality"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/uuid"
"k8s.io/apimachinery/pkg/util/wait"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/client-go/informers"
clientset "k8s.io/client-go/kubernetes"
typedv1 "k8s.io/client-go/kubernetes/typed/core/v1"
restclient "k8s.io/client-go/rest"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/util/retry"
featuregatetesting "k8s.io/component-base/featuregate/testing"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
"k8s.io/kubernetes/pkg/controller/replication"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/test/integration/framework"
)
@ -491,6 +495,44 @@ func TestSpecReplicasChange(t *testing.T) {
}
}
func TestLogarithmicScaleDown(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.LogarithmicScaleDown, true)()
s, closeFn, rm, informers, c := rmSetup(t)
defer closeFn()
ns := framework.CreateTestingNamespace("test-spec-replicas-change", s, t)
defer framework.DeleteTestingNamespace(ns, s, t)
stopCh := runControllerAndInformers(t, rm, informers, 0)
defer close(stopCh)
rc := newRC("rc", ns.Name, 2)
rcs, _ := createRCsPods(t, c, []*v1.ReplicationController{rc}, []*v1.Pod{})
rc = rcs[0]
waitRCStable(t, c, rc)
// get list of pods in the cluster
pods, err := c.CoreV1().Pods(ns.Name).List(context.TODO(), metav1.ListOptions{})
if err != nil {
t.Fatalf("failed to get pods in namespace %s: %+v", ns.Name, err)
}
// Wait 10 seconds and scale up, the new pod should be in a new logarithmic rank from the first 2
time.Sleep(10 * time.Second)
scaleRC(t, c, rc, 3)
// scale back down, and confirm that the pods left in the namespace are the original ones
// (meaning the 3rd one was deleted)
scaleRC(t, c, rc, 2)
newPods, err := c.CoreV1().Pods(ns.Name).List(context.TODO(), metav1.ListOptions{})
if err != nil {
t.Fatalf("failed to get pods in namespace %s: %+v", ns.Name, err)
}
if !apiequality.Semantic.DeepEqual(pods.Items, newPods.Items) {
t.Fatalf("expected pods %+v, got %+v", pods.Items, newPods.Items)
}
}
func TestDeletingAndFailedPods(t *testing.T) {
s, closeFn, rm, informers, c := rmSetup(t)
defer closeFn()