mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-30 23:15:14 +00:00
More work on shuffle sharding utils
Changes following up on shuffle sharding util package. Made the validation checking function return a slice of error messages rather than just a bit. Replaced all the `int32` with `int` because this is intended for more than just the priority-and-faireness feature and so should not be a slave to its configuration datatypes. Introduced ShuffleAndDealIntoHand, to make memory allocation the caller's problem/privilege. Made the hand uniformity tester avoid reflection, evaluate the histogram against the expected range of counts, and run multiple test cases, including one in which the number of hash values is a power of two with four extra bits (as the validation check requires) and one in which the deck size is not a power of two.
This commit is contained in:
parent
e97eaef4f6
commit
da0b647155
@ -18,20 +18,34 @@ package shufflesharding
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const maxHashBits = 60
|
||||
|
||||
// ValidateParameters can validate parameters for shuffle sharding
|
||||
// in a fast but approximate way, including deckSize and handSize
|
||||
// Algorithm: maxHashBits >= bits(deckSize^handSize)
|
||||
func ValidateParameters(deckSize, handSize int32) bool {
|
||||
if handSize <= 0 || deckSize <= 0 || handSize > deckSize {
|
||||
return false
|
||||
// ValidateParameters finds errors in the parameters for shuffle
|
||||
// sharding. Returns a slice for which `len()` is 0 if and only if
|
||||
// there are no errors. The entropy requirement is evaluated in a
|
||||
// fast but approximate way: bits(deckSize^handSize).
|
||||
func ValidateParameters(deckSize, handSize int) (errs []string) {
|
||||
if handSize <= 0 {
|
||||
errs = append(errs, "handSize is not positive")
|
||||
}
|
||||
|
||||
return math.Log2(float64(deckSize))*float64(handSize) <= maxHashBits
|
||||
if deckSize <= 0 {
|
||||
errs = append(errs, "deckSize is not positive")
|
||||
}
|
||||
if len(errs) > 0 {
|
||||
return
|
||||
}
|
||||
if handSize > deckSize {
|
||||
return []string{"handSize is greater than deckSize"}
|
||||
}
|
||||
if math.Log2(float64(deckSize))*float64(handSize) > maxHashBits {
|
||||
return []string{fmt.Sprintf("more than %d bits of entropy required", maxHashBits)}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// ShuffleAndDeal can shuffle a hash value to handSize-quantity and non-redundant
|
||||
@ -39,16 +53,16 @@ func ValidateParameters(deckSize, handSize int32) bool {
|
||||
// Eg. From deckSize=128, handSize=8, we can get an index array [12 14 73 18 119 51 117 26],
|
||||
// then pick function will choose the optimal index from these
|
||||
// Algorithm: https://github.com/kubernetes/enhancements/blob/master/keps/sig-api-machinery/20190228-priority-and-fairness.md#queue-assignment-proof-of-concept
|
||||
func ShuffleAndDeal(hashValue uint64, deckSize, handSize int32, pick func(int32)) {
|
||||
remainders := make([]int32, handSize)
|
||||
func ShuffleAndDeal(hashValue uint64, deckSize, handSize int, pick func(int)) {
|
||||
remainders := make([]int, handSize)
|
||||
|
||||
for i := int32(0); i < handSize; i++ {
|
||||
for i := 0; i < handSize; i++ {
|
||||
hashValueNext := hashValue / uint64(deckSize-i)
|
||||
remainders[i] = int32(hashValue - uint64(deckSize-i)*hashValueNext)
|
||||
remainders[i] = int(hashValue - uint64(deckSize-i)*hashValueNext)
|
||||
hashValue = hashValueNext
|
||||
}
|
||||
|
||||
for i := int32(0); i < handSize; i++ {
|
||||
for i := 0; i < handSize; i++ {
|
||||
candidate := remainders[i]
|
||||
for j := i; j > 0; j-- {
|
||||
if candidate >= remainders[j-1] {
|
||||
@ -60,9 +74,9 @@ func ShuffleAndDeal(hashValue uint64, deckSize, handSize int32, pick func(int32)
|
||||
}
|
||||
|
||||
// ShuffleAndDealWithValidation will do validation before ShuffleAndDeal
|
||||
func ShuffleAndDealWithValidation(hashValue uint64, deckSize, handSize int32, pick func(int32)) error {
|
||||
if !ValidateParameters(deckSize, handSize) {
|
||||
return errors.New("bad parameters")
|
||||
func ShuffleAndDealWithValidation(hashValue uint64, deckSize, handSize int, pick func(int)) error {
|
||||
if errs := ValidateParameters(deckSize, handSize); len(errs) > 0 {
|
||||
return errors.New(strings.Join(errs, ";"))
|
||||
}
|
||||
|
||||
ShuffleAndDeal(hashValue, deckSize, handSize, pick)
|
||||
@ -71,14 +85,14 @@ func ShuffleAndDealWithValidation(hashValue uint64, deckSize, handSize int32, pi
|
||||
|
||||
// ShuffleAndDealToSlice will use specific pick function to return slices of indices
|
||||
// after ShuffleAndDeal
|
||||
func ShuffleAndDealToSlice(hashValue uint64, deckSize, handSize int32) []int32 {
|
||||
func ShuffleAndDealToSlice(hashValue uint64, deckSize, handSize int) []int {
|
||||
var (
|
||||
candidates = make([]int32, handSize)
|
||||
candidates = make([]int, handSize)
|
||||
idx = 0
|
||||
)
|
||||
|
||||
pickToSlices := func(can int32) {
|
||||
candidates[idx] = can
|
||||
pickToSlices := func(can int) {
|
||||
candidates[idx] = int(can)
|
||||
idx++
|
||||
}
|
||||
|
||||
@ -87,11 +101,33 @@ func ShuffleAndDealToSlice(hashValue uint64, deckSize, handSize int32) []int32 {
|
||||
return candidates
|
||||
}
|
||||
|
||||
// ShuffleAndDealIntoHand shuffles a deck of the given size by the
|
||||
// given hash value and deals cards into the given slice. The virtue
|
||||
// of this function compared to ShuffleAndDealToSlice is that the
|
||||
// caller provides the storage for the hand.
|
||||
func ShuffleAndDealIntoHand(hashValue uint64, deckSize int, hand []int) {
|
||||
handSize := len(hand)
|
||||
var idx int
|
||||
ShuffleAndDeal(hashValue, deckSize, handSize, func(card int) {
|
||||
hand[idx] = int(card)
|
||||
idx++
|
||||
})
|
||||
}
|
||||
|
||||
// ShuffleAndDealToSliceWithValidation will do validation before ShuffleAndDealToSlice
|
||||
func ShuffleAndDealToSliceWithValidation(hashValue uint64, deckSize, handSize int32) ([]int32, error) {
|
||||
if !ValidateParameters(deckSize, handSize) {
|
||||
return nil, errors.New("bad parameters")
|
||||
func ShuffleAndDealToSliceWithValidation(hashValue uint64, deckSize, handSize int) ([]int, error) {
|
||||
if errs := ValidateParameters(deckSize, handSize); len(errs) > 0 {
|
||||
return nil, errors.New(strings.Join(errs, ";"))
|
||||
}
|
||||
|
||||
return ShuffleAndDealToSlice(hashValue, deckSize, handSize), nil
|
||||
}
|
||||
|
||||
// ShuffleAndDealIntoHandWithValidation does validation and then ShuffleAndDealIntoHand
|
||||
func ShuffleAndDealIntoHandWithValidation(hashValue uint64, deckSize int, hand []int) error {
|
||||
if errs := ValidateParameters(deckSize, len(hand)); len(errs) > 0 {
|
||||
return errors.New(strings.Join(errs, ";"))
|
||||
}
|
||||
ShuffleAndDealIntoHand(hashValue, deckSize, hand)
|
||||
return nil
|
||||
}
|
||||
|
@ -20,69 +20,72 @@ import (
|
||||
"math"
|
||||
"math/rand"
|
||||
"sort"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestValidateParameters(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
deckSize int32
|
||||
handSize int32
|
||||
validated bool
|
||||
name string
|
||||
deckSize int
|
||||
handSize int
|
||||
errors []string
|
||||
}{
|
||||
{
|
||||
"deckSize is < 0",
|
||||
-100,
|
||||
8,
|
||||
false,
|
||||
[]string{"deckSize is not positive"},
|
||||
},
|
||||
{
|
||||
"handSize is < 0",
|
||||
128,
|
||||
-100,
|
||||
false,
|
||||
[]string{"handSize is not positive"},
|
||||
},
|
||||
{
|
||||
"deckSize is 0",
|
||||
0,
|
||||
8,
|
||||
false,
|
||||
[]string{"deckSize is not positive"},
|
||||
},
|
||||
{
|
||||
"handSize is 0",
|
||||
128,
|
||||
0,
|
||||
false,
|
||||
[]string{"handSize is not positive"},
|
||||
},
|
||||
{
|
||||
"handSize is greater than deckSize",
|
||||
128,
|
||||
129,
|
||||
false,
|
||||
[]string{"handSize is greater than deckSize"},
|
||||
},
|
||||
{
|
||||
"deckSize: 128 handSize: 6",
|
||||
128,
|
||||
6,
|
||||
true,
|
||||
nil,
|
||||
},
|
||||
{
|
||||
"deckSize: 1024 handSize: 6",
|
||||
1024,
|
||||
6,
|
||||
true,
|
||||
nil,
|
||||
},
|
||||
{
|
||||
"deckSize: 512 handSize: 8",
|
||||
512,
|
||||
8,
|
||||
false,
|
||||
[]string{"more than 60 bits of entropy required"},
|
||||
},
|
||||
}
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
if ValidateParameters(test.deckSize, test.handSize) != test.validated {
|
||||
t.Errorf("test case %s fails", test.name)
|
||||
got := strings.Join(ValidateParameters(test.deckSize, test.handSize), ";")
|
||||
expected := strings.Join(test.errors, ";")
|
||||
if got != expected {
|
||||
t.Errorf("test case %s got %q but expected %q", test.name, got, expected)
|
||||
return
|
||||
}
|
||||
})
|
||||
@ -90,7 +93,7 @@ func TestValidateParameters(t *testing.T) {
|
||||
}
|
||||
|
||||
func BenchmarkValidateParameters(b *testing.B) {
|
||||
deckSize, handSize := int32(512), int32(8)
|
||||
deckSize, handSize := 512, 8
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = ValidateParameters(deckSize, handSize)
|
||||
}
|
||||
@ -99,65 +102,65 @@ func BenchmarkValidateParameters(b *testing.B) {
|
||||
func TestShuffleAndDealWithValidation(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
deckSize int32
|
||||
handSize int32
|
||||
pick func(int32)
|
||||
deckSize int
|
||||
handSize int
|
||||
pick func(int)
|
||||
validated bool
|
||||
}{
|
||||
{
|
||||
"deckSize is < 0",
|
||||
-100,
|
||||
8,
|
||||
func(int32) {},
|
||||
func(int) {},
|
||||
false,
|
||||
},
|
||||
{
|
||||
"handSize is < 0",
|
||||
128,
|
||||
-100,
|
||||
func(int32) {},
|
||||
func(int) {},
|
||||
false,
|
||||
},
|
||||
{
|
||||
"deckSize is 0",
|
||||
0,
|
||||
8,
|
||||
func(int32) {},
|
||||
func(int) {},
|
||||
false,
|
||||
},
|
||||
{
|
||||
"handSize is 0",
|
||||
128,
|
||||
0,
|
||||
func(int32) {},
|
||||
func(int) {},
|
||||
false,
|
||||
},
|
||||
{
|
||||
"handSize is greater than deckSize",
|
||||
128,
|
||||
129,
|
||||
func(int32) {},
|
||||
func(int) {},
|
||||
false,
|
||||
},
|
||||
{
|
||||
"deckSize: 128 handSize: 6",
|
||||
128,
|
||||
6,
|
||||
func(int32) {},
|
||||
func(int) {},
|
||||
true,
|
||||
},
|
||||
{
|
||||
"deckSize: 1024 handSize: 6",
|
||||
1024,
|
||||
6,
|
||||
func(int32) {},
|
||||
func(int) {},
|
||||
true,
|
||||
},
|
||||
{
|
||||
"deckSize: 512 handSize: 8",
|
||||
512,
|
||||
8,
|
||||
func(int32) {},
|
||||
func(int) {},
|
||||
false,
|
||||
},
|
||||
}
|
||||
@ -173,8 +176,8 @@ func TestShuffleAndDealWithValidation(t *testing.T) {
|
||||
|
||||
func BenchmarkShuffleAndDeal(b *testing.B) {
|
||||
hashValueBase := math.MaxUint64 / uint64(b.N)
|
||||
deckSize, handSize := int32(512), int32(8)
|
||||
pick := func(int32) {}
|
||||
deckSize, handSize := 512, 8
|
||||
pick := func(int) {}
|
||||
for i := 0; i < b.N; i++ {
|
||||
ShuffleAndDeal(hashValueBase*uint64(i), deckSize, handSize, pick)
|
||||
}
|
||||
@ -183,8 +186,8 @@ func BenchmarkShuffleAndDeal(b *testing.B) {
|
||||
func TestShuffleAndDealToSliceWithValidation(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
deckSize int32
|
||||
handSize int32
|
||||
deckSize int
|
||||
handSize int
|
||||
validated bool
|
||||
}{
|
||||
{
|
||||
@ -253,13 +256,13 @@ func TestShuffleAndDealToSliceWithValidation(t *testing.T) {
|
||||
}
|
||||
|
||||
// check cards range and duplication
|
||||
cardMap := make(map[int32]struct{})
|
||||
for _, cardIdx := range cards {
|
||||
if cardIdx < 0 || cardIdx >= test.deckSize {
|
||||
cardMap := make(map[int]struct{})
|
||||
for _, card := range cards {
|
||||
if card < 0 || card >= int(test.deckSize) {
|
||||
t.Errorf("test case %s fails in range check", test.name)
|
||||
return
|
||||
}
|
||||
cardMap[cardIdx] = struct{}{}
|
||||
cardMap[card] = struct{}{}
|
||||
}
|
||||
if len(cardMap) != int(test.handSize) {
|
||||
t.Errorf("test case %s fails in duplication check", test.name)
|
||||
@ -272,33 +275,73 @@ func TestShuffleAndDealToSliceWithValidation(t *testing.T) {
|
||||
|
||||
func BenchmarkShuffleAndDealToSlice(b *testing.B) {
|
||||
hashValueBase := math.MaxUint64 / uint64(b.N)
|
||||
deckSize, handSize := int32(512), int32(8)
|
||||
deckSize, handSize := 512, 8
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = ShuffleAndDealToSlice(hashValueBase*uint64(i), deckSize, handSize)
|
||||
}
|
||||
}
|
||||
|
||||
func TestUniformDistribution(t *testing.T) {
|
||||
deckSize, handSize := int32(128), int32(3)
|
||||
handCoordinateMap := make(map[int]int)
|
||||
|
||||
allCoordinateCount := 128 * 127 * 126 / 6
|
||||
|
||||
for i := 0; i < allCoordinateCount*16; i++ {
|
||||
hands := ShuffleAndDealToSlice(rand.Uint64(), deckSize, handSize)
|
||||
sort.Slice(hands, func(i, j int) bool {
|
||||
return hands[i] < hands[j]
|
||||
})
|
||||
handCoordinate := 0
|
||||
for _, hand := range hands {
|
||||
handCoordinate = handCoordinate<<7 + int(hand)
|
||||
}
|
||||
handCoordinateMap[handCoordinate]++
|
||||
// ff computes the falling factorial `n!/(n-m)!` and requires n to be
|
||||
// positive and m to be in the range [0, n] and requires the answer to
|
||||
// fit in an int
|
||||
func ff(n, m int) int {
|
||||
ans := 1
|
||||
for f := n; f > n-m; f-- {
|
||||
ans *= f
|
||||
}
|
||||
return ans
|
||||
}
|
||||
|
||||
// TODO: check uniform distribution
|
||||
t.Logf("%d", len(handCoordinateMap))
|
||||
t.Logf("%d", allCoordinateCount)
|
||||
func TestUniformDistribution(t *testing.T) {
|
||||
const spare = 64 - maxHashBits
|
||||
tests := []struct {
|
||||
deckSize, handSize int
|
||||
hashMax int
|
||||
}{
|
||||
{64, 3, 1 << uint(math.Ceil(math.Log2(float64(ff(64, 3))))+spare)},
|
||||
{128, 3, ff(128, 3)},
|
||||
{128, 3, 3 * ff(128, 3)},
|
||||
{70, 4, ff(70, 4)},
|
||||
}
|
||||
for _, test := range tests {
|
||||
handCoordinateMap := make(map[int]int) // maps coded hand to count of times seen
|
||||
|
||||
fallingFactorial := ff(test.deckSize, test.handSize)
|
||||
permutations := ff(test.handSize, test.handSize)
|
||||
allCoordinateCount := fallingFactorial / permutations
|
||||
nff := float64(test.hashMax) / float64(fallingFactorial)
|
||||
minCount := permutations * int(math.Floor(nff))
|
||||
maxCount := permutations * int(math.Ceil(nff))
|
||||
aHand := make([]int, test.handSize)
|
||||
for i := 0; i < test.hashMax; i++ {
|
||||
ShuffleAndDealIntoHand(uint64(i), test.deckSize, aHand)
|
||||
sort.IntSlice(aHand).Sort()
|
||||
handCoordinate := 0
|
||||
for _, card := range aHand {
|
||||
handCoordinate = handCoordinate<<7 + card
|
||||
}
|
||||
handCoordinateMap[handCoordinate]++
|
||||
}
|
||||
|
||||
t.Logf("Deck size = %v, hand size = %v, number of possible hands = %d, number of hands seen = %d, number of deals = %d, expected count range = [%v, %v]", test.deckSize, test.handSize, allCoordinateCount, len(handCoordinateMap), test.hashMax, minCount, maxCount)
|
||||
|
||||
// histogram maps (count of times a hand is seen) to (number of hands having that count)
|
||||
histogram := make(map[int]int)
|
||||
for _, count := range handCoordinateMap {
|
||||
histogram[count] = histogram[count] + 1
|
||||
}
|
||||
|
||||
var goodSum int
|
||||
for count := minCount; count <= maxCount; count++ {
|
||||
goodSum += histogram[count]
|
||||
}
|
||||
|
||||
goodPct := 100 * float64(goodSum) / float64(allCoordinateCount)
|
||||
|
||||
t.Logf("good percentage = %v, histogram = %v", goodPct, histogram)
|
||||
if goodSum != allCoordinateCount {
|
||||
t.Errorf("Only %v percent of the hands got a central count", goodPct)
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user