More work on shuffle sharding utils

Changes following up on shuffle sharding util package.

Made the validation checking function return a slice of error messages
rather than just a bit.

Replaced all the `int32` with `int` because this is intended for more
than just the priority-and-faireness feature and so should not be a
slave to its configuration datatypes.

Introduced ShuffleAndDealIntoHand, to make memory allocation the
caller's problem/privilege.

Made the hand uniformity tester avoid reflection, evaluate the
histogram against the expected range of counts, and run multiple test
cases, including one in which the number of hash values is a power of
two with four extra bits (as the validation check requires) and one in
which the deck size is not a power of two.
This commit is contained in:
Mike Spreitzer 2019-08-08 14:53:40 -04:00 committed by Bruce Ma
parent e97eaef4f6
commit da0b647155
2 changed files with 156 additions and 77 deletions

View File

@ -18,20 +18,34 @@ package shufflesharding
import (
"errors"
"fmt"
"math"
"strings"
)
const maxHashBits = 60
// ValidateParameters can validate parameters for shuffle sharding
// in a fast but approximate way, including deckSize and handSize
// Algorithm: maxHashBits >= bits(deckSize^handSize)
func ValidateParameters(deckSize, handSize int32) bool {
if handSize <= 0 || deckSize <= 0 || handSize > deckSize {
return false
// ValidateParameters finds errors in the parameters for shuffle
// sharding. Returns a slice for which `len()` is 0 if and only if
// there are no errors. The entropy requirement is evaluated in a
// fast but approximate way: bits(deckSize^handSize).
func ValidateParameters(deckSize, handSize int) (errs []string) {
if handSize <= 0 {
errs = append(errs, "handSize is not positive")
}
return math.Log2(float64(deckSize))*float64(handSize) <= maxHashBits
if deckSize <= 0 {
errs = append(errs, "deckSize is not positive")
}
if len(errs) > 0 {
return
}
if handSize > deckSize {
return []string{"handSize is greater than deckSize"}
}
if math.Log2(float64(deckSize))*float64(handSize) > maxHashBits {
return []string{fmt.Sprintf("more than %d bits of entropy required", maxHashBits)}
}
return
}
// ShuffleAndDeal can shuffle a hash value to handSize-quantity and non-redundant
@ -39,16 +53,16 @@ func ValidateParameters(deckSize, handSize int32) bool {
// Eg. From deckSize=128, handSize=8, we can get an index array [12 14 73 18 119 51 117 26],
// then pick function will choose the optimal index from these
// Algorithm: https://github.com/kubernetes/enhancements/blob/master/keps/sig-api-machinery/20190228-priority-and-fairness.md#queue-assignment-proof-of-concept
func ShuffleAndDeal(hashValue uint64, deckSize, handSize int32, pick func(int32)) {
remainders := make([]int32, handSize)
func ShuffleAndDeal(hashValue uint64, deckSize, handSize int, pick func(int)) {
remainders := make([]int, handSize)
for i := int32(0); i < handSize; i++ {
for i := 0; i < handSize; i++ {
hashValueNext := hashValue / uint64(deckSize-i)
remainders[i] = int32(hashValue - uint64(deckSize-i)*hashValueNext)
remainders[i] = int(hashValue - uint64(deckSize-i)*hashValueNext)
hashValue = hashValueNext
}
for i := int32(0); i < handSize; i++ {
for i := 0; i < handSize; i++ {
candidate := remainders[i]
for j := i; j > 0; j-- {
if candidate >= remainders[j-1] {
@ -60,9 +74,9 @@ func ShuffleAndDeal(hashValue uint64, deckSize, handSize int32, pick func(int32)
}
// ShuffleAndDealWithValidation will do validation before ShuffleAndDeal
func ShuffleAndDealWithValidation(hashValue uint64, deckSize, handSize int32, pick func(int32)) error {
if !ValidateParameters(deckSize, handSize) {
return errors.New("bad parameters")
func ShuffleAndDealWithValidation(hashValue uint64, deckSize, handSize int, pick func(int)) error {
if errs := ValidateParameters(deckSize, handSize); len(errs) > 0 {
return errors.New(strings.Join(errs, ";"))
}
ShuffleAndDeal(hashValue, deckSize, handSize, pick)
@ -71,14 +85,14 @@ func ShuffleAndDealWithValidation(hashValue uint64, deckSize, handSize int32, pi
// ShuffleAndDealToSlice will use specific pick function to return slices of indices
// after ShuffleAndDeal
func ShuffleAndDealToSlice(hashValue uint64, deckSize, handSize int32) []int32 {
func ShuffleAndDealToSlice(hashValue uint64, deckSize, handSize int) []int {
var (
candidates = make([]int32, handSize)
candidates = make([]int, handSize)
idx = 0
)
pickToSlices := func(can int32) {
candidates[idx] = can
pickToSlices := func(can int) {
candidates[idx] = int(can)
idx++
}
@ -87,11 +101,33 @@ func ShuffleAndDealToSlice(hashValue uint64, deckSize, handSize int32) []int32 {
return candidates
}
// ShuffleAndDealIntoHand shuffles a deck of the given size by the
// given hash value and deals cards into the given slice. The virtue
// of this function compared to ShuffleAndDealToSlice is that the
// caller provides the storage for the hand.
func ShuffleAndDealIntoHand(hashValue uint64, deckSize int, hand []int) {
handSize := len(hand)
var idx int
ShuffleAndDeal(hashValue, deckSize, handSize, func(card int) {
hand[idx] = int(card)
idx++
})
}
// ShuffleAndDealToSliceWithValidation will do validation before ShuffleAndDealToSlice
func ShuffleAndDealToSliceWithValidation(hashValue uint64, deckSize, handSize int32) ([]int32, error) {
if !ValidateParameters(deckSize, handSize) {
return nil, errors.New("bad parameters")
func ShuffleAndDealToSliceWithValidation(hashValue uint64, deckSize, handSize int) ([]int, error) {
if errs := ValidateParameters(deckSize, handSize); len(errs) > 0 {
return nil, errors.New(strings.Join(errs, ";"))
}
return ShuffleAndDealToSlice(hashValue, deckSize, handSize), nil
}
// ShuffleAndDealIntoHandWithValidation does validation and then ShuffleAndDealIntoHand
func ShuffleAndDealIntoHandWithValidation(hashValue uint64, deckSize int, hand []int) error {
if errs := ValidateParameters(deckSize, len(hand)); len(errs) > 0 {
return errors.New(strings.Join(errs, ";"))
}
ShuffleAndDealIntoHand(hashValue, deckSize, hand)
return nil
}

View File

@ -20,69 +20,72 @@ import (
"math"
"math/rand"
"sort"
"strings"
"testing"
)
func TestValidateParameters(t *testing.T) {
tests := []struct {
name string
deckSize int32
handSize int32
validated bool
name string
deckSize int
handSize int
errors []string
}{
{
"deckSize is < 0",
-100,
8,
false,
[]string{"deckSize is not positive"},
},
{
"handSize is < 0",
128,
-100,
false,
[]string{"handSize is not positive"},
},
{
"deckSize is 0",
0,
8,
false,
[]string{"deckSize is not positive"},
},
{
"handSize is 0",
128,
0,
false,
[]string{"handSize is not positive"},
},
{
"handSize is greater than deckSize",
128,
129,
false,
[]string{"handSize is greater than deckSize"},
},
{
"deckSize: 128 handSize: 6",
128,
6,
true,
nil,
},
{
"deckSize: 1024 handSize: 6",
1024,
6,
true,
nil,
},
{
"deckSize: 512 handSize: 8",
512,
8,
false,
[]string{"more than 60 bits of entropy required"},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
if ValidateParameters(test.deckSize, test.handSize) != test.validated {
t.Errorf("test case %s fails", test.name)
got := strings.Join(ValidateParameters(test.deckSize, test.handSize), ";")
expected := strings.Join(test.errors, ";")
if got != expected {
t.Errorf("test case %s got %q but expected %q", test.name, got, expected)
return
}
})
@ -90,7 +93,7 @@ func TestValidateParameters(t *testing.T) {
}
func BenchmarkValidateParameters(b *testing.B) {
deckSize, handSize := int32(512), int32(8)
deckSize, handSize := 512, 8
for i := 0; i < b.N; i++ {
_ = ValidateParameters(deckSize, handSize)
}
@ -99,65 +102,65 @@ func BenchmarkValidateParameters(b *testing.B) {
func TestShuffleAndDealWithValidation(t *testing.T) {
tests := []struct {
name string
deckSize int32
handSize int32
pick func(int32)
deckSize int
handSize int
pick func(int)
validated bool
}{
{
"deckSize is < 0",
-100,
8,
func(int32) {},
func(int) {},
false,
},
{
"handSize is < 0",
128,
-100,
func(int32) {},
func(int) {},
false,
},
{
"deckSize is 0",
0,
8,
func(int32) {},
func(int) {},
false,
},
{
"handSize is 0",
128,
0,
func(int32) {},
func(int) {},
false,
},
{
"handSize is greater than deckSize",
128,
129,
func(int32) {},
func(int) {},
false,
},
{
"deckSize: 128 handSize: 6",
128,
6,
func(int32) {},
func(int) {},
true,
},
{
"deckSize: 1024 handSize: 6",
1024,
6,
func(int32) {},
func(int) {},
true,
},
{
"deckSize: 512 handSize: 8",
512,
8,
func(int32) {},
func(int) {},
false,
},
}
@ -173,8 +176,8 @@ func TestShuffleAndDealWithValidation(t *testing.T) {
func BenchmarkShuffleAndDeal(b *testing.B) {
hashValueBase := math.MaxUint64 / uint64(b.N)
deckSize, handSize := int32(512), int32(8)
pick := func(int32) {}
deckSize, handSize := 512, 8
pick := func(int) {}
for i := 0; i < b.N; i++ {
ShuffleAndDeal(hashValueBase*uint64(i), deckSize, handSize, pick)
}
@ -183,8 +186,8 @@ func BenchmarkShuffleAndDeal(b *testing.B) {
func TestShuffleAndDealToSliceWithValidation(t *testing.T) {
tests := []struct {
name string
deckSize int32
handSize int32
deckSize int
handSize int
validated bool
}{
{
@ -253,13 +256,13 @@ func TestShuffleAndDealToSliceWithValidation(t *testing.T) {
}
// check cards range and duplication
cardMap := make(map[int32]struct{})
for _, cardIdx := range cards {
if cardIdx < 0 || cardIdx >= test.deckSize {
cardMap := make(map[int]struct{})
for _, card := range cards {
if card < 0 || card >= int(test.deckSize) {
t.Errorf("test case %s fails in range check", test.name)
return
}
cardMap[cardIdx] = struct{}{}
cardMap[card] = struct{}{}
}
if len(cardMap) != int(test.handSize) {
t.Errorf("test case %s fails in duplication check", test.name)
@ -272,33 +275,73 @@ func TestShuffleAndDealToSliceWithValidation(t *testing.T) {
func BenchmarkShuffleAndDealToSlice(b *testing.B) {
hashValueBase := math.MaxUint64 / uint64(b.N)
deckSize, handSize := int32(512), int32(8)
deckSize, handSize := 512, 8
for i := 0; i < b.N; i++ {
_ = ShuffleAndDealToSlice(hashValueBase*uint64(i), deckSize, handSize)
}
}
func TestUniformDistribution(t *testing.T) {
deckSize, handSize := int32(128), int32(3)
handCoordinateMap := make(map[int]int)
allCoordinateCount := 128 * 127 * 126 / 6
for i := 0; i < allCoordinateCount*16; i++ {
hands := ShuffleAndDealToSlice(rand.Uint64(), deckSize, handSize)
sort.Slice(hands, func(i, j int) bool {
return hands[i] < hands[j]
})
handCoordinate := 0
for _, hand := range hands {
handCoordinate = handCoordinate<<7 + int(hand)
}
handCoordinateMap[handCoordinate]++
// ff computes the falling factorial `n!/(n-m)!` and requires n to be
// positive and m to be in the range [0, n] and requires the answer to
// fit in an int
func ff(n, m int) int {
ans := 1
for f := n; f > n-m; f-- {
ans *= f
}
return ans
}
// TODO: check uniform distribution
t.Logf("%d", len(handCoordinateMap))
t.Logf("%d", allCoordinateCount)
func TestUniformDistribution(t *testing.T) {
const spare = 64 - maxHashBits
tests := []struct {
deckSize, handSize int
hashMax int
}{
{64, 3, 1 << uint(math.Ceil(math.Log2(float64(ff(64, 3))))+spare)},
{128, 3, ff(128, 3)},
{128, 3, 3 * ff(128, 3)},
{70, 4, ff(70, 4)},
}
for _, test := range tests {
handCoordinateMap := make(map[int]int) // maps coded hand to count of times seen
fallingFactorial := ff(test.deckSize, test.handSize)
permutations := ff(test.handSize, test.handSize)
allCoordinateCount := fallingFactorial / permutations
nff := float64(test.hashMax) / float64(fallingFactorial)
minCount := permutations * int(math.Floor(nff))
maxCount := permutations * int(math.Ceil(nff))
aHand := make([]int, test.handSize)
for i := 0; i < test.hashMax; i++ {
ShuffleAndDealIntoHand(uint64(i), test.deckSize, aHand)
sort.IntSlice(aHand).Sort()
handCoordinate := 0
for _, card := range aHand {
handCoordinate = handCoordinate<<7 + card
}
handCoordinateMap[handCoordinate]++
}
t.Logf("Deck size = %v, hand size = %v, number of possible hands = %d, number of hands seen = %d, number of deals = %d, expected count range = [%v, %v]", test.deckSize, test.handSize, allCoordinateCount, len(handCoordinateMap), test.hashMax, minCount, maxCount)
// histogram maps (count of times a hand is seen) to (number of hands having that count)
histogram := make(map[int]int)
for _, count := range handCoordinateMap {
histogram[count] = histogram[count] + 1
}
var goodSum int
for count := minCount; count <= maxCount; count++ {
goodSum += histogram[count]
}
goodPct := 100 * float64(goodSum) / float64(allCoordinateCount)
t.Logf("good percentage = %v, histogram = %v", goodPct, histogram)
if goodSum != allCoordinateCount {
t.Errorf("Only %v percent of the hands got a central count", goodPct)
}
}
return
}