Merge pull request #83665 from mars1024/feat/shuffle-sharding

shuffle sharding package for priority and fairness
2025-08-05 10:19:50 +00:00 · 2019-10-23 09:48:28 -07:00 · 2019-10-23 09:48:28 -07:00 · cd274ff270
commit cd274ff270
parent b5a2abfda7 7a3ca070cd
4 changed files with 455 additions and 0 deletions
--- a/staging/src/k8s.io/apiserver/BUILD
+++ b/staging/src/k8s.io/apiserver/BUILD
@ -44,6 +44,7 @@ filegroup(
        "//staging/src/k8s.io/apiserver/pkg/util/flushwriter:all-srcs",
        "//staging/src/k8s.io/apiserver/pkg/util/openapi:all-srcs",
        "//staging/src/k8s.io/apiserver/pkg/util/proxy:all-srcs",
+        "//staging/src/k8s.io/apiserver/pkg/util/shufflesharding:all-srcs",
        "//staging/src/k8s.io/apiserver/pkg/util/term:all-srcs",
        "//staging/src/k8s.io/apiserver/pkg/util/webhook:all-srcs",
        "//staging/src/k8s.io/apiserver/pkg/util/wsstream:all-srcs",
--- a/staging/src/k8s.io/apiserver/pkg/util/shufflesharding/BUILD
+++ b/staging/src/k8s.io/apiserver/pkg/util/shufflesharding/BUILD
@ -0,0 +1,29 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
+
+go_library(
+    name = "go_default_library",
+    srcs = ["shufflesharding.go"],
+    importmap = "k8s.io/kubernetes/vendor/k8s.io/apiserver/pkg/util/shufflesharding",
+    importpath = "k8s.io/apiserver/pkg/util/shufflesharding",
+    visibility = ["//visibility:public"],
+)
+
+go_test(
+    name = "go_default_test",
+    srcs = ["shufflesharding_test.go"],
+    embed = [":go_default_library"],
+)
+
+filegroup(
+    name = "package-srcs",
+    srcs = glob(["**"]),
+    tags = ["automanaged"],
+    visibility = ["//visibility:private"],
+)
+
+filegroup(
+    name = "all-srcs",
+    srcs = [":package-srcs"],
+    tags = ["automanaged"],
+    visibility = ["//visibility:public"],
+)
--- a/staging/src/k8s.io/apiserver/pkg/util/shufflesharding/shufflesharding.go
+++ b/staging/src/k8s.io/apiserver/pkg/util/shufflesharding/shufflesharding.go
@ -0,0 +1,107 @@
+/*
+Copyright 2019 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package shufflesharding
+
+import (
+	"fmt"
+	"math"
+)
+
+// MaxHashBits is the max bit length which can be used from hash value.
+// If we use all bits of hash value, the critical(last) card shuffled by
+// Dealer will be uneven to 2:3 (first half:second half) at most,
+// in order to reduce this unevenness to 32:33, we set MaxHashBits to 60 here.
+const MaxHashBits = 60
+
+// RequiredEntropyBits makes a quick and slightly conservative estimate of the number
+// of bits of hash value that are consumed in shuffle sharding a deck of the given size
+// to a hand of the given size.  The result is meaningful only if
+// 1 <= handSize <= deckSize <= 1<<26.
+func RequiredEntropyBits(deckSize, handSize int) int {
+	return int(math.Ceil(math.Log2(float64(deckSize)) * float64(handSize)))
+}
+
+// Dealer contains some necessary parameters and provides some methods for shuffle sharding.
+// Dealer is thread-safe.
+type Dealer struct {
+	deckSize int
+	handSize int
+}
+
+// NewDealer will create a Dealer with the given deckSize and handSize, will return error when
+// deckSize or handSize is invalid as below.
+// 1. deckSize or handSize is not positive
+// 2. handSize is greater than deckSize
+// 3. deckSize is impractically large (greater than 1<<26)
+// 4. required entropy bits of deckSize and handSize is greater than MaxHashBits
+func NewDealer(deckSize, handSize int) (*Dealer, error) {
+	if deckSize <= 0 || handSize <= 0 {
+		return nil, fmt.Errorf("deckSize %d or handSize %d is not positive", deckSize, handSize)
+	}
+	if handSize > deckSize {
+		return nil, fmt.Errorf("handSize %d is greater than deckSize %d", handSize, deckSize)
+	}
+	if deckSize > 1<<26 {
+		return nil, fmt.Errorf("deckSize %d is impractically large", deckSize)
+	}
+	if RequiredEntropyBits(deckSize, handSize) > MaxHashBits {
+		return nil, fmt.Errorf("required entropy bits of deckSize %d and handSize %d is greater than %d", deckSize, handSize, MaxHashBits)
+	}
+
+	return &Dealer{
+		deckSize: deckSize,
+		handSize: handSize,
+	}, nil
+}
+
+// Deal shuffles a card deck and deals a hand of cards, using the given hashValue as the source of entropy.
+// The deck size and hand size are properties of the Dealer.
+// This function synchronously makes sequential calls to pick, one for each dealt card.
+// Each card is identified by an integer in the range [0, deckSize).
+// For example, for deckSize=128 and handSize=4 this function might call pick(14); pick(73); pick(119); pick(26).
+func (d *Dealer) Deal(hashValue uint64, pick func(int)) {
+	// 15 is the largest possible value of handSize
+	var remainders [15]int
+
+	for i := 0; i < d.handSize; i++ {
+		hashValueNext := hashValue / uint64(d.deckSize-i)
+		remainders[i] = int(hashValue - uint64(d.deckSize-i)*hashValueNext)
+		hashValue = hashValueNext
+	}
+
+	for i := 0; i < d.handSize; i++ {
+		card := remainders[i]
+		for j := i; j > 0; j-- {
+			if card >= remainders[j-1] {
+				card++
+			}
+		}
+		pick(card)
+	}
+}
+
+// DealIntoHand shuffles and deals according to the Dealer's parameters,
+// using the given hashValue as the source of entropy and then
+// returns the dealt cards as a slice of `int`.
+// If `hand` has the correct length as Dealer's handSize, it will be used as-is and no allocations will be made.
+// If `hand` is nil or too small, it will be extended (performing an allocation).
+// If `hand` is too large, a sub-slice will be returned.
+func (d *Dealer) DealIntoHand(hashValue uint64, hand []int) []int {
+	h := hand[:0]
+	d.Deal(hashValue, func(card int) { h = append(h, card) })
+	return h
+}
--- a/staging/src/k8s.io/apiserver/pkg/util/shufflesharding/shufflesharding_test.go
+++ b/staging/src/k8s.io/apiserver/pkg/util/shufflesharding/shufflesharding_test.go
@ -0,0 +1,318 @@
+/*
+Copyright 2019 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package shufflesharding
+
+import (
+	"fmt"
+	"math"
+	"math/rand"
+	"reflect"
+	"sort"
+	"testing"
+)
+
+func TestRequiredEntropyBits(t *testing.T) {
+	tests := []struct {
+		name         string
+		deckSize     int
+		handSize     int
+		expectedBits int
+	}{
+		{
+			"deckSize: 1024 handSize: 6",
+			1024,
+			6,
+			60,
+		},
+		{
+			"deckSize: 512 handSize: 8",
+			512,
+			8,
+			72,
+		},
+	}
+
+	for _, test := range tests {
+		bits := RequiredEntropyBits(test.deckSize, test.handSize)
+		if bits != test.expectedBits {
+			t.Errorf("test %s fails: expected %v but got %v", test.name, test.expectedBits, bits)
+			return
+		}
+	}
+}
+
+func TestNewDealer(t *testing.T) {
+	tests := []struct {
+		name     string
+		deckSize int
+		handSize int
+		err      error
+	}{
+		{
+			"deckSize <= 0",
+			-100,
+			8,
+			fmt.Errorf("deckSize -100 or handSize 8 is not positive"),
+		},
+		{
+			"handSize <= 0",
+			100,
+			0,
+			fmt.Errorf("deckSize 100 or handSize 0 is not positive"),
+		},
+		{
+			"handSize is greater than deckSize",
+			100,
+			101,
+			fmt.Errorf("handSize 101 is greater than deckSize 100"),
+		},
+		{
+			"deckSize is impractically large",
+			1 << 27,
+			2,
+			fmt.Errorf("deckSize 134217728 is impractically large"),
+		},
+		{
+			"required entropy bits is greater than MaxHashBits",
+			512,
+			8,
+			fmt.Errorf("required entropy bits of deckSize 512 and handSize 8 is greater than 60"),
+		},
+		{
+			"deckSize: 1024 handSize: 6",
+			1024,
+			6,
+			nil,
+		},
+	}
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			_, err := NewDealer(test.deckSize, test.handSize)
+			if !reflect.DeepEqual(err, test.err) {
+				t.Errorf("test %s fails: expected %v but got %v", test.name, test.err, err)
+				return
+			}
+		})
+	}
+}
+
+func TestCardDuplication(t *testing.T) {
+	tests := []struct {
+		name     string
+		deckSize int
+		handSize int
+	}{
+		{
+			"deckSize = handSize = 4",
+			4,
+			4,
+		},
+		{
+			"deckSize = handSize = 8",
+			8,
+			8,
+		},
+		{
+			"deckSize = handSize = 10",
+			10,
+			10,
+		},
+		{
+			"deckSize = handSize = 12",
+			12,
+			12,
+		},
+		{
+			"deckSize = 128, handSize = 8",
+			128,
+			8,
+		},
+		{
+			"deckSize = 256, handSize = 7",
+			256,
+			7,
+		},
+		{
+			"deckSize = 512, handSize = 6",
+			512,
+			6,
+		},
+	}
+	for _, test := range tests {
+		hashValue := rand.Uint64()
+		t.Run(test.name, func(t *testing.T) {
+			dealer, err := NewDealer(test.deckSize, test.handSize)
+			if err != nil {
+				t.Errorf("fail to create Dealer: %v", err)
+				return
+			}
+			hand := make([]int, 0)
+			hand = dealer.DealIntoHand(hashValue, hand)
+
+			// check cards number
+			if len(hand) != int(test.handSize) {
+				t.Errorf("test case %s fails in cards number", test.name)
+				return
+			}
+
+			// check cards range and duplication
+			cardMap := make(map[int]struct{})
+			for _, card := range hand {
+				if card < 0 || card >= int(test.deckSize) {
+					t.Errorf("test case %s fails in range check", test.name)
+					return
+				}
+				cardMap[card] = struct{}{}
+			}
+			if len(cardMap) != int(test.handSize) {
+				t.Errorf("test case %s fails in duplication check", test.name)
+				return
+			}
+
+		})
+	}
+}
+
+// ff computes the falling factorial `n!/(n-m)!` and requires n to be
+// positive and m to be in the range [0, n] and requires the answer to
+// fit in an int
+func ff(n, m int) int {
+	ans := 1
+	for f := n; f > n-m; f-- {
+		ans *= f
+	}
+	return ans
+}
+
+func TestUniformDistribution(t *testing.T) {
+	const spare = 64 - MaxHashBits
+	tests := []struct {
+		deckSize, handSize int
+		hashMax            int
+	}{
+		{64, 3, 1 << uint(math.Ceil(math.Log2(float64(ff(64, 3))))+spare)},
+		{128, 3, ff(128, 3)},
+		{128, 3, 3 * ff(128, 3)},
+		{70, 4, ff(70, 4)},
+	}
+	for _, test := range tests {
+		dealer, err := NewDealer(test.deckSize, test.handSize)
+		if err != nil {
+			t.Errorf("fail to create Dealer: %v", err)
+			return
+		}
+		handCoordinateMap := make(map[int]int) // maps coded hand to count of times seen
+
+		fallingFactorial := ff(test.deckSize, test.handSize)
+		permutations := ff(test.handSize, test.handSize)
+		allCoordinateCount := fallingFactorial / permutations
+		nff := float64(test.hashMax) / float64(fallingFactorial)
+		minCount := permutations * int(math.Floor(nff))
+		maxCount := permutations * int(math.Ceil(nff))
+		aHand := make([]int, test.handSize)
+		for i := 0; i < test.hashMax; i++ {
+			aHand = dealer.DealIntoHand(uint64(i), aHand)
+			sort.IntSlice(aHand).Sort()
+			handCoordinate := 0
+			for _, card := range aHand {
+				handCoordinate = handCoordinate<<7 + card
+			}
+			handCoordinateMap[handCoordinate]++
+		}
+
+		t.Logf("Deck size = %v, hand size = %v, number of possible hands = %d, number of hands seen = %d, number of deals = %d, expected count range = [%v, %v]", test.deckSize, test.handSize, allCoordinateCount, len(handCoordinateMap), test.hashMax, minCount, maxCount)
+
+		// histogram maps (count of times a hand is seen) to (number of hands having that count)
+		histogram := make(map[int]int)
+		for _, count := range handCoordinateMap {
+			histogram[count] = histogram[count] + 1
+		}
+
+		var goodSum int
+		for count := minCount; count <= maxCount; count++ {
+			goodSum += histogram[count]
+		}
+
+		goodPct := 100 * float64(goodSum) / float64(allCoordinateCount)
+
+		t.Logf("good percentage = %v, histogram = %v", goodPct, histogram)
+		if goodSum != allCoordinateCount {
+			t.Errorf("Only %v percent of the hands got a central count", goodPct)
+		}
+	}
+	return
+}
+
+func TestDealer_DealIntoHand(t *testing.T) {
+	dealer, _ := NewDealer(6, 6)
+
+	tests := []struct {
+		name         string
+		hand         []int
+		expectedSize int
+	}{
+		{
+			"nil slice",
+			nil,
+			6,
+		},
+		{
+			"empty slice",
+			make([]int, 0),
+			6,
+		},
+		{
+			"size: 6 cap: 6 slice",
+			make([]int, 6, 6),
+			6,
+		},
+		{
+			"size: 6 cap: 12 slice",
+			make([]int, 6, 12),
+			6,
+		},
+		{
+			"size: 4 cap: 4 slice",
+			make([]int, 4, 4),
+			6,
+		},
+		{
+			"size: 4 cap: 12 slice",
+			make([]int, 4, 12),
+			6,
+		},
+		{
+			"size: 10 cap: 10 slice",
+			make([]int, 10, 10),
+			6,
+		},
+		{
+			"size: 10 cap: 12 slice",
+			make([]int, 10, 12),
+			6,
+		},
+	}
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			h := dealer.DealIntoHand(0, test.hand)
+			if len(h) != test.expectedSize {
+				t.Errorf("test %s fails: expetced size %d but got %d", test.name, test.expectedSize, len(h))
+				return
+			}
+		})
+	}
+}