From e97eaef4f65ec2cbfebf4fd9e726c9e2a6bf5499 Mon Sep 17 00:00:00 2001 From: Bruce Ma Date: Mon, 29 Jul 2019 18:01:08 +0800 Subject: [PATCH 1/3] Add shuffle sharding utils and tests Implement several shuffle sharding functions including ShuffleAndDeal, ShuffleAndDealToSlice. Add benchmarks and tests for shuffle sharding to test performance, correctness and distribution uniformity. Signed-off-by: Bruce Ma --- staging/src/k8s.io/apiserver/BUILD | 1 + .../apiserver/pkg/util/shufflesharding/BUILD | 29 ++ .../util/shufflesharding/shufflesharding.go | 97 ++++++ .../shufflesharding/shufflesharding_test.go | 304 ++++++++++++++++++ 4 files changed, 431 insertions(+) create mode 100644 staging/src/k8s.io/apiserver/pkg/util/shufflesharding/BUILD create mode 100644 staging/src/k8s.io/apiserver/pkg/util/shufflesharding/shufflesharding.go create mode 100644 staging/src/k8s.io/apiserver/pkg/util/shufflesharding/shufflesharding_test.go diff --git a/staging/src/k8s.io/apiserver/BUILD b/staging/src/k8s.io/apiserver/BUILD index 9b457fb28ba..6946f785c2c 100644 --- a/staging/src/k8s.io/apiserver/BUILD +++ b/staging/src/k8s.io/apiserver/BUILD @@ -44,6 +44,7 @@ filegroup( "//staging/src/k8s.io/apiserver/pkg/util/flushwriter:all-srcs", "//staging/src/k8s.io/apiserver/pkg/util/openapi:all-srcs", "//staging/src/k8s.io/apiserver/pkg/util/proxy:all-srcs", + "//staging/src/k8s.io/apiserver/pkg/util/shufflesharding:all-srcs", "//staging/src/k8s.io/apiserver/pkg/util/term:all-srcs", "//staging/src/k8s.io/apiserver/pkg/util/webhook:all-srcs", "//staging/src/k8s.io/apiserver/pkg/util/wsstream:all-srcs", diff --git a/staging/src/k8s.io/apiserver/pkg/util/shufflesharding/BUILD b/staging/src/k8s.io/apiserver/pkg/util/shufflesharding/BUILD new file mode 100644 index 00000000000..8508afe3ddb --- /dev/null +++ b/staging/src/k8s.io/apiserver/pkg/util/shufflesharding/BUILD @@ -0,0 +1,29 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "go_default_library", + srcs = ["shufflesharding.go"], + importmap = "k8s.io/kubernetes/vendor/k8s.io/apiserver/pkg/util/shufflesharding", + importpath = "k8s.io/apiserver/pkg/util/shufflesharding", + visibility = ["//visibility:public"], +) + +go_test( + name = "go_default_test", + srcs = ["shufflesharding_test.go"], + embed = [":go_default_library"], +) + +filegroup( + name = "package-srcs", + srcs = glob(["**"]), + tags = ["automanaged"], + visibility = ["//visibility:private"], +) + +filegroup( + name = "all-srcs", + srcs = [":package-srcs"], + tags = ["automanaged"], + visibility = ["//visibility:public"], +) diff --git a/staging/src/k8s.io/apiserver/pkg/util/shufflesharding/shufflesharding.go b/staging/src/k8s.io/apiserver/pkg/util/shufflesharding/shufflesharding.go new file mode 100644 index 00000000000..a95d791f6e7 --- /dev/null +++ b/staging/src/k8s.io/apiserver/pkg/util/shufflesharding/shufflesharding.go @@ -0,0 +1,97 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package shufflesharding + +import ( + "errors" + "math" +) + +const maxHashBits = 60 + +// ValidateParameters can validate parameters for shuffle sharding +// in a fast but approximate way, including deckSize and handSize +// Algorithm: maxHashBits >= bits(deckSize^handSize) +func ValidateParameters(deckSize, handSize int32) bool { + if handSize <= 0 || deckSize <= 0 || handSize > deckSize { + return false + } + + return math.Log2(float64(deckSize))*float64(handSize) <= maxHashBits +} + +// ShuffleAndDeal can shuffle a hash value to handSize-quantity and non-redundant +// indices of decks, with the pick function, we can get the optimal deck index +// Eg. From deckSize=128, handSize=8, we can get an index array [12 14 73 18 119 51 117 26], +// then pick function will choose the optimal index from these +// Algorithm: https://github.com/kubernetes/enhancements/blob/master/keps/sig-api-machinery/20190228-priority-and-fairness.md#queue-assignment-proof-of-concept +func ShuffleAndDeal(hashValue uint64, deckSize, handSize int32, pick func(int32)) { + remainders := make([]int32, handSize) + + for i := int32(0); i < handSize; i++ { + hashValueNext := hashValue / uint64(deckSize-i) + remainders[i] = int32(hashValue - uint64(deckSize-i)*hashValueNext) + hashValue = hashValueNext + } + + for i := int32(0); i < handSize; i++ { + candidate := remainders[i] + for j := i; j > 0; j-- { + if candidate >= remainders[j-1] { + candidate++ + } + } + pick(candidate) + } +} + +// ShuffleAndDealWithValidation will do validation before ShuffleAndDeal +func ShuffleAndDealWithValidation(hashValue uint64, deckSize, handSize int32, pick func(int32)) error { + if !ValidateParameters(deckSize, handSize) { + return errors.New("bad parameters") + } + + ShuffleAndDeal(hashValue, deckSize, handSize, pick) + return nil +} + +// ShuffleAndDealToSlice will use specific pick function to return slices of indices +// after ShuffleAndDeal +func ShuffleAndDealToSlice(hashValue uint64, deckSize, handSize int32) []int32 { + var ( + candidates = make([]int32, handSize) + idx = 0 + ) + + pickToSlices := func(can int32) { + candidates[idx] = can + idx++ + } + + ShuffleAndDeal(hashValue, deckSize, handSize, pickToSlices) + + return candidates +} + +// ShuffleAndDealToSliceWithValidation will do validation before ShuffleAndDealToSlice +func ShuffleAndDealToSliceWithValidation(hashValue uint64, deckSize, handSize int32) ([]int32, error) { + if !ValidateParameters(deckSize, handSize) { + return nil, errors.New("bad parameters") + } + + return ShuffleAndDealToSlice(hashValue, deckSize, handSize), nil +} diff --git a/staging/src/k8s.io/apiserver/pkg/util/shufflesharding/shufflesharding_test.go b/staging/src/k8s.io/apiserver/pkg/util/shufflesharding/shufflesharding_test.go new file mode 100644 index 00000000000..0f6529c3f02 --- /dev/null +++ b/staging/src/k8s.io/apiserver/pkg/util/shufflesharding/shufflesharding_test.go @@ -0,0 +1,304 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package shufflesharding + +import ( + "math" + "math/rand" + "sort" + "testing" +) + +func TestValidateParameters(t *testing.T) { + tests := []struct { + name string + deckSize int32 + handSize int32 + validated bool + }{ + { + "deckSize is < 0", + -100, + 8, + false, + }, + { + "handSize is < 0", + 128, + -100, + false, + }, + { + "deckSize is 0", + 0, + 8, + false, + }, + { + "handSize is 0", + 128, + 0, + false, + }, + { + "handSize is greater than deckSize", + 128, + 129, + false, + }, + { + "deckSize: 128 handSize: 6", + 128, + 6, + true, + }, + { + "deckSize: 1024 handSize: 6", + 1024, + 6, + true, + }, + { + "deckSize: 512 handSize: 8", + 512, + 8, + false, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + if ValidateParameters(test.deckSize, test.handSize) != test.validated { + t.Errorf("test case %s fails", test.name) + return + } + }) + } +} + +func BenchmarkValidateParameters(b *testing.B) { + deckSize, handSize := int32(512), int32(8) + for i := 0; i < b.N; i++ { + _ = ValidateParameters(deckSize, handSize) + } +} + +func TestShuffleAndDealWithValidation(t *testing.T) { + tests := []struct { + name string + deckSize int32 + handSize int32 + pick func(int32) + validated bool + }{ + { + "deckSize is < 0", + -100, + 8, + func(int32) {}, + false, + }, + { + "handSize is < 0", + 128, + -100, + func(int32) {}, + false, + }, + { + "deckSize is 0", + 0, + 8, + func(int32) {}, + false, + }, + { + "handSize is 0", + 128, + 0, + func(int32) {}, + false, + }, + { + "handSize is greater than deckSize", + 128, + 129, + func(int32) {}, + false, + }, + { + "deckSize: 128 handSize: 6", + 128, + 6, + func(int32) {}, + true, + }, + { + "deckSize: 1024 handSize: 6", + 1024, + 6, + func(int32) {}, + true, + }, + { + "deckSize: 512 handSize: 8", + 512, + 8, + func(int32) {}, + false, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + if (ShuffleAndDealWithValidation(rand.Uint64(), test.deckSize, test.handSize, test.pick) == nil) != test.validated { + t.Errorf("test case %s fails", test.name) + return + } + }) + } +} + +func BenchmarkShuffleAndDeal(b *testing.B) { + hashValueBase := math.MaxUint64 / uint64(b.N) + deckSize, handSize := int32(512), int32(8) + pick := func(int32) {} + for i := 0; i < b.N; i++ { + ShuffleAndDeal(hashValueBase*uint64(i), deckSize, handSize, pick) + } +} + +func TestShuffleAndDealToSliceWithValidation(t *testing.T) { + tests := []struct { + name string + deckSize int32 + handSize int32 + validated bool + }{ + { + "validation fails", + -100, + -100, + false, + }, + { + "deckSize = handSize = 4", + 4, + 4, + true, + }, + { + "deckSize = handSize = 8", + 8, + 8, + true, + }, + { + "deckSize = handSize = 10", + 10, + 10, + true, + }, + { + "deckSize = handSize = 12", + 12, + 12, + true, + }, + { + "deckSize = 128, handSize = 8", + 128, + 8, + true, + }, + { + "deckSize = 256, handSize = 7", + 256, + 7, + true, + }, + { + "deckSize = 512, handSize = 6", + 512, + 6, + true, + }, + } + for _, test := range tests { + hashValue := rand.Uint64() + t.Run(test.name, func(t *testing.T) { + cards, err := ShuffleAndDealToSliceWithValidation(hashValue, test.deckSize, test.handSize) + if (err == nil) != test.validated { + t.Errorf("test case %s fails in validation check", test.name) + return + } + + if test.validated { + // check cards number + if len(cards) != int(test.handSize) { + t.Errorf("test case %s fails in cards number", test.name) + return + } + + // check cards range and duplication + cardMap := make(map[int32]struct{}) + for _, cardIdx := range cards { + if cardIdx < 0 || cardIdx >= test.deckSize { + t.Errorf("test case %s fails in range check", test.name) + return + } + cardMap[cardIdx] = struct{}{} + } + if len(cardMap) != int(test.handSize) { + t.Errorf("test case %s fails in duplication check", test.name) + return + } + } + }) + } +} + +func BenchmarkShuffleAndDealToSlice(b *testing.B) { + hashValueBase := math.MaxUint64 / uint64(b.N) + deckSize, handSize := int32(512), int32(8) + for i := 0; i < b.N; i++ { + _ = ShuffleAndDealToSlice(hashValueBase*uint64(i), deckSize, handSize) + } +} + +func TestUniformDistribution(t *testing.T) { + deckSize, handSize := int32(128), int32(3) + handCoordinateMap := make(map[int]int) + + allCoordinateCount := 128 * 127 * 126 / 6 + + for i := 0; i < allCoordinateCount*16; i++ { + hands := ShuffleAndDealToSlice(rand.Uint64(), deckSize, handSize) + sort.Slice(hands, func(i, j int) bool { + return hands[i] < hands[j] + }) + handCoordinate := 0 + for _, hand := range hands { + handCoordinate = handCoordinate<<7 + int(hand) + } + handCoordinateMap[handCoordinate]++ + } + + // TODO: check uniform distribution + t.Logf("%d", len(handCoordinateMap)) + t.Logf("%d", allCoordinateCount) + + return +} From da0b647155912c6b1e6b971aa5685768915d810d Mon Sep 17 00:00:00 2001 From: Mike Spreitzer Date: Thu, 8 Aug 2019 14:53:40 -0400 Subject: [PATCH 2/3] More work on shuffle sharding utils Changes following up on shuffle sharding util package. Made the validation checking function return a slice of error messages rather than just a bit. Replaced all the `int32` with `int` because this is intended for more than just the priority-and-faireness feature and so should not be a slave to its configuration datatypes. Introduced ShuffleAndDealIntoHand, to make memory allocation the caller's problem/privilege. Made the hand uniformity tester avoid reflection, evaluate the histogram against the expected range of counts, and run multiple test cases, including one in which the number of hash values is a power of two with four extra bits (as the validation check requires) and one in which the deck size is not a power of two. --- .../util/shufflesharding/shufflesharding.go | 82 +++++++--- .../shufflesharding/shufflesharding_test.go | 151 +++++++++++------- 2 files changed, 156 insertions(+), 77 deletions(-) diff --git a/staging/src/k8s.io/apiserver/pkg/util/shufflesharding/shufflesharding.go b/staging/src/k8s.io/apiserver/pkg/util/shufflesharding/shufflesharding.go index a95d791f6e7..2767b2f0b87 100644 --- a/staging/src/k8s.io/apiserver/pkg/util/shufflesharding/shufflesharding.go +++ b/staging/src/k8s.io/apiserver/pkg/util/shufflesharding/shufflesharding.go @@ -18,20 +18,34 @@ package shufflesharding import ( "errors" + "fmt" "math" + "strings" ) const maxHashBits = 60 -// ValidateParameters can validate parameters for shuffle sharding -// in a fast but approximate way, including deckSize and handSize -// Algorithm: maxHashBits >= bits(deckSize^handSize) -func ValidateParameters(deckSize, handSize int32) bool { - if handSize <= 0 || deckSize <= 0 || handSize > deckSize { - return false +// ValidateParameters finds errors in the parameters for shuffle +// sharding. Returns a slice for which `len()` is 0 if and only if +// there are no errors. The entropy requirement is evaluated in a +// fast but approximate way: bits(deckSize^handSize). +func ValidateParameters(deckSize, handSize int) (errs []string) { + if handSize <= 0 { + errs = append(errs, "handSize is not positive") } - - return math.Log2(float64(deckSize))*float64(handSize) <= maxHashBits + if deckSize <= 0 { + errs = append(errs, "deckSize is not positive") + } + if len(errs) > 0 { + return + } + if handSize > deckSize { + return []string{"handSize is greater than deckSize"} + } + if math.Log2(float64(deckSize))*float64(handSize) > maxHashBits { + return []string{fmt.Sprintf("more than %d bits of entropy required", maxHashBits)} + } + return } // ShuffleAndDeal can shuffle a hash value to handSize-quantity and non-redundant @@ -39,16 +53,16 @@ func ValidateParameters(deckSize, handSize int32) bool { // Eg. From deckSize=128, handSize=8, we can get an index array [12 14 73 18 119 51 117 26], // then pick function will choose the optimal index from these // Algorithm: https://github.com/kubernetes/enhancements/blob/master/keps/sig-api-machinery/20190228-priority-and-fairness.md#queue-assignment-proof-of-concept -func ShuffleAndDeal(hashValue uint64, deckSize, handSize int32, pick func(int32)) { - remainders := make([]int32, handSize) +func ShuffleAndDeal(hashValue uint64, deckSize, handSize int, pick func(int)) { + remainders := make([]int, handSize) - for i := int32(0); i < handSize; i++ { + for i := 0; i < handSize; i++ { hashValueNext := hashValue / uint64(deckSize-i) - remainders[i] = int32(hashValue - uint64(deckSize-i)*hashValueNext) + remainders[i] = int(hashValue - uint64(deckSize-i)*hashValueNext) hashValue = hashValueNext } - for i := int32(0); i < handSize; i++ { + for i := 0; i < handSize; i++ { candidate := remainders[i] for j := i; j > 0; j-- { if candidate >= remainders[j-1] { @@ -60,9 +74,9 @@ func ShuffleAndDeal(hashValue uint64, deckSize, handSize int32, pick func(int32) } // ShuffleAndDealWithValidation will do validation before ShuffleAndDeal -func ShuffleAndDealWithValidation(hashValue uint64, deckSize, handSize int32, pick func(int32)) error { - if !ValidateParameters(deckSize, handSize) { - return errors.New("bad parameters") +func ShuffleAndDealWithValidation(hashValue uint64, deckSize, handSize int, pick func(int)) error { + if errs := ValidateParameters(deckSize, handSize); len(errs) > 0 { + return errors.New(strings.Join(errs, ";")) } ShuffleAndDeal(hashValue, deckSize, handSize, pick) @@ -71,14 +85,14 @@ func ShuffleAndDealWithValidation(hashValue uint64, deckSize, handSize int32, pi // ShuffleAndDealToSlice will use specific pick function to return slices of indices // after ShuffleAndDeal -func ShuffleAndDealToSlice(hashValue uint64, deckSize, handSize int32) []int32 { +func ShuffleAndDealToSlice(hashValue uint64, deckSize, handSize int) []int { var ( - candidates = make([]int32, handSize) + candidates = make([]int, handSize) idx = 0 ) - pickToSlices := func(can int32) { - candidates[idx] = can + pickToSlices := func(can int) { + candidates[idx] = int(can) idx++ } @@ -87,11 +101,33 @@ func ShuffleAndDealToSlice(hashValue uint64, deckSize, handSize int32) []int32 { return candidates } +// ShuffleAndDealIntoHand shuffles a deck of the given size by the +// given hash value and deals cards into the given slice. The virtue +// of this function compared to ShuffleAndDealToSlice is that the +// caller provides the storage for the hand. +func ShuffleAndDealIntoHand(hashValue uint64, deckSize int, hand []int) { + handSize := len(hand) + var idx int + ShuffleAndDeal(hashValue, deckSize, handSize, func(card int) { + hand[idx] = int(card) + idx++ + }) +} + // ShuffleAndDealToSliceWithValidation will do validation before ShuffleAndDealToSlice -func ShuffleAndDealToSliceWithValidation(hashValue uint64, deckSize, handSize int32) ([]int32, error) { - if !ValidateParameters(deckSize, handSize) { - return nil, errors.New("bad parameters") +func ShuffleAndDealToSliceWithValidation(hashValue uint64, deckSize, handSize int) ([]int, error) { + if errs := ValidateParameters(deckSize, handSize); len(errs) > 0 { + return nil, errors.New(strings.Join(errs, ";")) } return ShuffleAndDealToSlice(hashValue, deckSize, handSize), nil } + +// ShuffleAndDealIntoHandWithValidation does validation and then ShuffleAndDealIntoHand +func ShuffleAndDealIntoHandWithValidation(hashValue uint64, deckSize int, hand []int) error { + if errs := ValidateParameters(deckSize, len(hand)); len(errs) > 0 { + return errors.New(strings.Join(errs, ";")) + } + ShuffleAndDealIntoHand(hashValue, deckSize, hand) + return nil +} diff --git a/staging/src/k8s.io/apiserver/pkg/util/shufflesharding/shufflesharding_test.go b/staging/src/k8s.io/apiserver/pkg/util/shufflesharding/shufflesharding_test.go index 0f6529c3f02..125c28da284 100644 --- a/staging/src/k8s.io/apiserver/pkg/util/shufflesharding/shufflesharding_test.go +++ b/staging/src/k8s.io/apiserver/pkg/util/shufflesharding/shufflesharding_test.go @@ -20,69 +20,72 @@ import ( "math" "math/rand" "sort" + "strings" "testing" ) func TestValidateParameters(t *testing.T) { tests := []struct { - name string - deckSize int32 - handSize int32 - validated bool + name string + deckSize int + handSize int + errors []string }{ { "deckSize is < 0", -100, 8, - false, + []string{"deckSize is not positive"}, }, { "handSize is < 0", 128, -100, - false, + []string{"handSize is not positive"}, }, { "deckSize is 0", 0, 8, - false, + []string{"deckSize is not positive"}, }, { "handSize is 0", 128, 0, - false, + []string{"handSize is not positive"}, }, { "handSize is greater than deckSize", 128, 129, - false, + []string{"handSize is greater than deckSize"}, }, { "deckSize: 128 handSize: 6", 128, 6, - true, + nil, }, { "deckSize: 1024 handSize: 6", 1024, 6, - true, + nil, }, { "deckSize: 512 handSize: 8", 512, 8, - false, + []string{"more than 60 bits of entropy required"}, }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { - if ValidateParameters(test.deckSize, test.handSize) != test.validated { - t.Errorf("test case %s fails", test.name) + got := strings.Join(ValidateParameters(test.deckSize, test.handSize), ";") + expected := strings.Join(test.errors, ";") + if got != expected { + t.Errorf("test case %s got %q but expected %q", test.name, got, expected) return } }) @@ -90,7 +93,7 @@ func TestValidateParameters(t *testing.T) { } func BenchmarkValidateParameters(b *testing.B) { - deckSize, handSize := int32(512), int32(8) + deckSize, handSize := 512, 8 for i := 0; i < b.N; i++ { _ = ValidateParameters(deckSize, handSize) } @@ -99,65 +102,65 @@ func BenchmarkValidateParameters(b *testing.B) { func TestShuffleAndDealWithValidation(t *testing.T) { tests := []struct { name string - deckSize int32 - handSize int32 - pick func(int32) + deckSize int + handSize int + pick func(int) validated bool }{ { "deckSize is < 0", -100, 8, - func(int32) {}, + func(int) {}, false, }, { "handSize is < 0", 128, -100, - func(int32) {}, + func(int) {}, false, }, { "deckSize is 0", 0, 8, - func(int32) {}, + func(int) {}, false, }, { "handSize is 0", 128, 0, - func(int32) {}, + func(int) {}, false, }, { "handSize is greater than deckSize", 128, 129, - func(int32) {}, + func(int) {}, false, }, { "deckSize: 128 handSize: 6", 128, 6, - func(int32) {}, + func(int) {}, true, }, { "deckSize: 1024 handSize: 6", 1024, 6, - func(int32) {}, + func(int) {}, true, }, { "deckSize: 512 handSize: 8", 512, 8, - func(int32) {}, + func(int) {}, false, }, } @@ -173,8 +176,8 @@ func TestShuffleAndDealWithValidation(t *testing.T) { func BenchmarkShuffleAndDeal(b *testing.B) { hashValueBase := math.MaxUint64 / uint64(b.N) - deckSize, handSize := int32(512), int32(8) - pick := func(int32) {} + deckSize, handSize := 512, 8 + pick := func(int) {} for i := 0; i < b.N; i++ { ShuffleAndDeal(hashValueBase*uint64(i), deckSize, handSize, pick) } @@ -183,8 +186,8 @@ func BenchmarkShuffleAndDeal(b *testing.B) { func TestShuffleAndDealToSliceWithValidation(t *testing.T) { tests := []struct { name string - deckSize int32 - handSize int32 + deckSize int + handSize int validated bool }{ { @@ -253,13 +256,13 @@ func TestShuffleAndDealToSliceWithValidation(t *testing.T) { } // check cards range and duplication - cardMap := make(map[int32]struct{}) - for _, cardIdx := range cards { - if cardIdx < 0 || cardIdx >= test.deckSize { + cardMap := make(map[int]struct{}) + for _, card := range cards { + if card < 0 || card >= int(test.deckSize) { t.Errorf("test case %s fails in range check", test.name) return } - cardMap[cardIdx] = struct{}{} + cardMap[card] = struct{}{} } if len(cardMap) != int(test.handSize) { t.Errorf("test case %s fails in duplication check", test.name) @@ -272,33 +275,73 @@ func TestShuffleAndDealToSliceWithValidation(t *testing.T) { func BenchmarkShuffleAndDealToSlice(b *testing.B) { hashValueBase := math.MaxUint64 / uint64(b.N) - deckSize, handSize := int32(512), int32(8) + deckSize, handSize := 512, 8 for i := 0; i < b.N; i++ { _ = ShuffleAndDealToSlice(hashValueBase*uint64(i), deckSize, handSize) } } -func TestUniformDistribution(t *testing.T) { - deckSize, handSize := int32(128), int32(3) - handCoordinateMap := make(map[int]int) - - allCoordinateCount := 128 * 127 * 126 / 6 - - for i := 0; i < allCoordinateCount*16; i++ { - hands := ShuffleAndDealToSlice(rand.Uint64(), deckSize, handSize) - sort.Slice(hands, func(i, j int) bool { - return hands[i] < hands[j] - }) - handCoordinate := 0 - for _, hand := range hands { - handCoordinate = handCoordinate<<7 + int(hand) - } - handCoordinateMap[handCoordinate]++ +// ff computes the falling factorial `n!/(n-m)!` and requires n to be +// positive and m to be in the range [0, n] and requires the answer to +// fit in an int +func ff(n, m int) int { + ans := 1 + for f := n; f > n-m; f-- { + ans *= f } + return ans +} - // TODO: check uniform distribution - t.Logf("%d", len(handCoordinateMap)) - t.Logf("%d", allCoordinateCount) +func TestUniformDistribution(t *testing.T) { + const spare = 64 - maxHashBits + tests := []struct { + deckSize, handSize int + hashMax int + }{ + {64, 3, 1 << uint(math.Ceil(math.Log2(float64(ff(64, 3))))+spare)}, + {128, 3, ff(128, 3)}, + {128, 3, 3 * ff(128, 3)}, + {70, 4, ff(70, 4)}, + } + for _, test := range tests { + handCoordinateMap := make(map[int]int) // maps coded hand to count of times seen + fallingFactorial := ff(test.deckSize, test.handSize) + permutations := ff(test.handSize, test.handSize) + allCoordinateCount := fallingFactorial / permutations + nff := float64(test.hashMax) / float64(fallingFactorial) + minCount := permutations * int(math.Floor(nff)) + maxCount := permutations * int(math.Ceil(nff)) + aHand := make([]int, test.handSize) + for i := 0; i < test.hashMax; i++ { + ShuffleAndDealIntoHand(uint64(i), test.deckSize, aHand) + sort.IntSlice(aHand).Sort() + handCoordinate := 0 + for _, card := range aHand { + handCoordinate = handCoordinate<<7 + card + } + handCoordinateMap[handCoordinate]++ + } + + t.Logf("Deck size = %v, hand size = %v, number of possible hands = %d, number of hands seen = %d, number of deals = %d, expected count range = [%v, %v]", test.deckSize, test.handSize, allCoordinateCount, len(handCoordinateMap), test.hashMax, minCount, maxCount) + + // histogram maps (count of times a hand is seen) to (number of hands having that count) + histogram := make(map[int]int) + for _, count := range handCoordinateMap { + histogram[count] = histogram[count] + 1 + } + + var goodSum int + for count := minCount; count <= maxCount; count++ { + goodSum += histogram[count] + } + + goodPct := 100 * float64(goodSum) / float64(allCoordinateCount) + + t.Logf("good percentage = %v, histogram = %v", goodPct, histogram) + if goodSum != allCoordinateCount { + t.Errorf("Only %v percent of the hands got a central count", goodPct) + } + } return } From 7a3ca070cdd9804a22bf5db8a99576b09fc52484 Mon Sep 17 00:00:00 2001 From: Bruce Ma Date: Fri, 11 Oct 2019 20:06:31 +0800 Subject: [PATCH 3/3] address some comments Clean up useless functions, only keep the basic function Deal and the function DealIntoHand which will be used by Priority and Fairness. Improve some comments for constants and functions. Introduce Dealer to combine parameters and methods into a whole. Use fixed-size slice to improve performance. Use math.Ceil and math.Log2 to calculate required entropy bits. Make the given hand adaptive to handSize in DealIntoHand. Signed-off-by: Bruce Ma --- .../util/shufflesharding/shufflesharding.go | 166 ++++----- .../shufflesharding/shufflesharding_test.go | 325 ++++++++---------- 2 files changed, 218 insertions(+), 273 deletions(-) diff --git a/staging/src/k8s.io/apiserver/pkg/util/shufflesharding/shufflesharding.go b/staging/src/k8s.io/apiserver/pkg/util/shufflesharding/shufflesharding.go index 2767b2f0b87..6ef4ed89002 100644 --- a/staging/src/k8s.io/apiserver/pkg/util/shufflesharding/shufflesharding.go +++ b/staging/src/k8s.io/apiserver/pkg/util/shufflesharding/shufflesharding.go @@ -17,117 +17,91 @@ limitations under the License. package shufflesharding import ( - "errors" "fmt" "math" - "strings" ) -const maxHashBits = 60 +// MaxHashBits is the max bit length which can be used from hash value. +// If we use all bits of hash value, the critical(last) card shuffled by +// Dealer will be uneven to 2:3 (first half:second half) at most, +// in order to reduce this unevenness to 32:33, we set MaxHashBits to 60 here. +const MaxHashBits = 60 -// ValidateParameters finds errors in the parameters for shuffle -// sharding. Returns a slice for which `len()` is 0 if and only if -// there are no errors. The entropy requirement is evaluated in a -// fast but approximate way: bits(deckSize^handSize). -func ValidateParameters(deckSize, handSize int) (errs []string) { - if handSize <= 0 { - errs = append(errs, "handSize is not positive") - } - if deckSize <= 0 { - errs = append(errs, "deckSize is not positive") - } - if len(errs) > 0 { - return - } - if handSize > deckSize { - return []string{"handSize is greater than deckSize"} - } - if math.Log2(float64(deckSize))*float64(handSize) > maxHashBits { - return []string{fmt.Sprintf("more than %d bits of entropy required", maxHashBits)} - } - return +// RequiredEntropyBits makes a quick and slightly conservative estimate of the number +// of bits of hash value that are consumed in shuffle sharding a deck of the given size +// to a hand of the given size. The result is meaningful only if +// 1 <= handSize <= deckSize <= 1<<26. +func RequiredEntropyBits(deckSize, handSize int) int { + return int(math.Ceil(math.Log2(float64(deckSize)) * float64(handSize))) } -// ShuffleAndDeal can shuffle a hash value to handSize-quantity and non-redundant -// indices of decks, with the pick function, we can get the optimal deck index -// Eg. From deckSize=128, handSize=8, we can get an index array [12 14 73 18 119 51 117 26], -// then pick function will choose the optimal index from these -// Algorithm: https://github.com/kubernetes/enhancements/blob/master/keps/sig-api-machinery/20190228-priority-and-fairness.md#queue-assignment-proof-of-concept -func ShuffleAndDeal(hashValue uint64, deckSize, handSize int, pick func(int)) { - remainders := make([]int, handSize) +// Dealer contains some necessary parameters and provides some methods for shuffle sharding. +// Dealer is thread-safe. +type Dealer struct { + deckSize int + handSize int +} - for i := 0; i < handSize; i++ { - hashValueNext := hashValue / uint64(deckSize-i) - remainders[i] = int(hashValue - uint64(deckSize-i)*hashValueNext) +// NewDealer will create a Dealer with the given deckSize and handSize, will return error when +// deckSize or handSize is invalid as below. +// 1. deckSize or handSize is not positive +// 2. handSize is greater than deckSize +// 3. deckSize is impractically large (greater than 1<<26) +// 4. required entropy bits of deckSize and handSize is greater than MaxHashBits +func NewDealer(deckSize, handSize int) (*Dealer, error) { + if deckSize <= 0 || handSize <= 0 { + return nil, fmt.Errorf("deckSize %d or handSize %d is not positive", deckSize, handSize) + } + if handSize > deckSize { + return nil, fmt.Errorf("handSize %d is greater than deckSize %d", handSize, deckSize) + } + if deckSize > 1<<26 { + return nil, fmt.Errorf("deckSize %d is impractically large", deckSize) + } + if RequiredEntropyBits(deckSize, handSize) > MaxHashBits { + return nil, fmt.Errorf("required entropy bits of deckSize %d and handSize %d is greater than %d", deckSize, handSize, MaxHashBits) + } + + return &Dealer{ + deckSize: deckSize, + handSize: handSize, + }, nil +} + +// Deal shuffles a card deck and deals a hand of cards, using the given hashValue as the source of entropy. +// The deck size and hand size are properties of the Dealer. +// This function synchronously makes sequential calls to pick, one for each dealt card. +// Each card is identified by an integer in the range [0, deckSize). +// For example, for deckSize=128 and handSize=4 this function might call pick(14); pick(73); pick(119); pick(26). +func (d *Dealer) Deal(hashValue uint64, pick func(int)) { + // 15 is the largest possible value of handSize + var remainders [15]int + + for i := 0; i < d.handSize; i++ { + hashValueNext := hashValue / uint64(d.deckSize-i) + remainders[i] = int(hashValue - uint64(d.deckSize-i)*hashValueNext) hashValue = hashValueNext } - for i := 0; i < handSize; i++ { - candidate := remainders[i] + for i := 0; i < d.handSize; i++ { + card := remainders[i] for j := i; j > 0; j-- { - if candidate >= remainders[j-1] { - candidate++ + if card >= remainders[j-1] { + card++ } } - pick(candidate) + pick(card) } } -// ShuffleAndDealWithValidation will do validation before ShuffleAndDeal -func ShuffleAndDealWithValidation(hashValue uint64, deckSize, handSize int, pick func(int)) error { - if errs := ValidateParameters(deckSize, handSize); len(errs) > 0 { - return errors.New(strings.Join(errs, ";")) - } - - ShuffleAndDeal(hashValue, deckSize, handSize, pick) - return nil -} - -// ShuffleAndDealToSlice will use specific pick function to return slices of indices -// after ShuffleAndDeal -func ShuffleAndDealToSlice(hashValue uint64, deckSize, handSize int) []int { - var ( - candidates = make([]int, handSize) - idx = 0 - ) - - pickToSlices := func(can int) { - candidates[idx] = int(can) - idx++ - } - - ShuffleAndDeal(hashValue, deckSize, handSize, pickToSlices) - - return candidates -} - -// ShuffleAndDealIntoHand shuffles a deck of the given size by the -// given hash value and deals cards into the given slice. The virtue -// of this function compared to ShuffleAndDealToSlice is that the -// caller provides the storage for the hand. -func ShuffleAndDealIntoHand(hashValue uint64, deckSize int, hand []int) { - handSize := len(hand) - var idx int - ShuffleAndDeal(hashValue, deckSize, handSize, func(card int) { - hand[idx] = int(card) - idx++ - }) -} - -// ShuffleAndDealToSliceWithValidation will do validation before ShuffleAndDealToSlice -func ShuffleAndDealToSliceWithValidation(hashValue uint64, deckSize, handSize int) ([]int, error) { - if errs := ValidateParameters(deckSize, handSize); len(errs) > 0 { - return nil, errors.New(strings.Join(errs, ";")) - } - - return ShuffleAndDealToSlice(hashValue, deckSize, handSize), nil -} - -// ShuffleAndDealIntoHandWithValidation does validation and then ShuffleAndDealIntoHand -func ShuffleAndDealIntoHandWithValidation(hashValue uint64, deckSize int, hand []int) error { - if errs := ValidateParameters(deckSize, len(hand)); len(errs) > 0 { - return errors.New(strings.Join(errs, ";")) - } - ShuffleAndDealIntoHand(hashValue, deckSize, hand) - return nil +// DealIntoHand shuffles and deals according to the Dealer's parameters, +// using the given hashValue as the source of entropy and then +// returns the dealt cards as a slice of `int`. +// If `hand` has the correct length as Dealer's handSize, it will be used as-is and no allocations will be made. +// If `hand` is nil or too small, it will be extended (performing an allocation). +// If `hand` is too large, a sub-slice will be returned. +func (d *Dealer) DealIntoHand(hashValue uint64, hand []int) []int { + h := hand[:0] + d.Deal(hashValue, func(card int) { h = append(h, card) }) + return h } diff --git a/staging/src/k8s.io/apiserver/pkg/util/shufflesharding/shufflesharding_test.go b/staging/src/k8s.io/apiserver/pkg/util/shufflesharding/shufflesharding_test.go index 125c28da284..4e71a942a14 100644 --- a/staging/src/k8s.io/apiserver/pkg/util/shufflesharding/shufflesharding_test.go +++ b/staging/src/k8s.io/apiserver/pkg/util/shufflesharding/shufflesharding_test.go @@ -17,55 +17,80 @@ limitations under the License. package shufflesharding import ( + "fmt" "math" "math/rand" + "reflect" "sort" - "strings" "testing" ) -func TestValidateParameters(t *testing.T) { +func TestRequiredEntropyBits(t *testing.T) { + tests := []struct { + name string + deckSize int + handSize int + expectedBits int + }{ + { + "deckSize: 1024 handSize: 6", + 1024, + 6, + 60, + }, + { + "deckSize: 512 handSize: 8", + 512, + 8, + 72, + }, + } + + for _, test := range tests { + bits := RequiredEntropyBits(test.deckSize, test.handSize) + if bits != test.expectedBits { + t.Errorf("test %s fails: expected %v but got %v", test.name, test.expectedBits, bits) + return + } + } +} + +func TestNewDealer(t *testing.T) { tests := []struct { name string deckSize int handSize int - errors []string + err error }{ { - "deckSize is < 0", + "deckSize <= 0", -100, 8, - []string{"deckSize is not positive"}, + fmt.Errorf("deckSize -100 or handSize 8 is not positive"), }, { - "handSize is < 0", - 128, - -100, - []string{"handSize is not positive"}, - }, - { - "deckSize is 0", + "handSize <= 0", + 100, 0, - 8, - []string{"deckSize is not positive"}, - }, - { - "handSize is 0", - 128, - 0, - []string{"handSize is not positive"}, + fmt.Errorf("deckSize 100 or handSize 0 is not positive"), }, { "handSize is greater than deckSize", - 128, - 129, - []string{"handSize is greater than deckSize"}, + 100, + 101, + fmt.Errorf("handSize 101 is greater than deckSize 100"), }, { - "deckSize: 128 handSize: 6", - 128, - 6, - nil, + "deckSize is impractically large", + 1 << 27, + 2, + fmt.Errorf("deckSize 134217728 is impractically large"), + }, + { + "required entropy bits is greater than MaxHashBits", + 512, + 8, + fmt.Errorf("required entropy bits of deckSize 512 and handSize 8 is greater than 60"), }, { "deckSize: 1024 handSize: 6", @@ -73,211 +98,92 @@ func TestValidateParameters(t *testing.T) { 6, nil, }, - { - "deckSize: 512 handSize: 8", - 512, - 8, - []string{"more than 60 bits of entropy required"}, - }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { - got := strings.Join(ValidateParameters(test.deckSize, test.handSize), ";") - expected := strings.Join(test.errors, ";") - if got != expected { - t.Errorf("test case %s got %q but expected %q", test.name, got, expected) + _, err := NewDealer(test.deckSize, test.handSize) + if !reflect.DeepEqual(err, test.err) { + t.Errorf("test %s fails: expected %v but got %v", test.name, test.err, err) return } }) } } -func BenchmarkValidateParameters(b *testing.B) { - deckSize, handSize := 512, 8 - for i := 0; i < b.N; i++ { - _ = ValidateParameters(deckSize, handSize) - } -} - -func TestShuffleAndDealWithValidation(t *testing.T) { +func TestCardDuplication(t *testing.T) { tests := []struct { - name string - deckSize int - handSize int - pick func(int) - validated bool + name string + deckSize int + handSize int }{ - { - "deckSize is < 0", - -100, - 8, - func(int) {}, - false, - }, - { - "handSize is < 0", - 128, - -100, - func(int) {}, - false, - }, - { - "deckSize is 0", - 0, - 8, - func(int) {}, - false, - }, - { - "handSize is 0", - 128, - 0, - func(int) {}, - false, - }, - { - "handSize is greater than deckSize", - 128, - 129, - func(int) {}, - false, - }, - { - "deckSize: 128 handSize: 6", - 128, - 6, - func(int) {}, - true, - }, - { - "deckSize: 1024 handSize: 6", - 1024, - 6, - func(int) {}, - true, - }, - { - "deckSize: 512 handSize: 8", - 512, - 8, - func(int) {}, - false, - }, - } - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - if (ShuffleAndDealWithValidation(rand.Uint64(), test.deckSize, test.handSize, test.pick) == nil) != test.validated { - t.Errorf("test case %s fails", test.name) - return - } - }) - } -} - -func BenchmarkShuffleAndDeal(b *testing.B) { - hashValueBase := math.MaxUint64 / uint64(b.N) - deckSize, handSize := 512, 8 - pick := func(int) {} - for i := 0; i < b.N; i++ { - ShuffleAndDeal(hashValueBase*uint64(i), deckSize, handSize, pick) - } -} - -func TestShuffleAndDealToSliceWithValidation(t *testing.T) { - tests := []struct { - name string - deckSize int - handSize int - validated bool - }{ - { - "validation fails", - -100, - -100, - false, - }, { "deckSize = handSize = 4", 4, 4, - true, }, { "deckSize = handSize = 8", 8, 8, - true, }, { "deckSize = handSize = 10", 10, 10, - true, }, { "deckSize = handSize = 12", 12, 12, - true, }, { "deckSize = 128, handSize = 8", 128, 8, - true, }, { "deckSize = 256, handSize = 7", 256, 7, - true, }, { "deckSize = 512, handSize = 6", 512, 6, - true, }, } for _, test := range tests { hashValue := rand.Uint64() t.Run(test.name, func(t *testing.T) { - cards, err := ShuffleAndDealToSliceWithValidation(hashValue, test.deckSize, test.handSize) - if (err == nil) != test.validated { - t.Errorf("test case %s fails in validation check", test.name) + dealer, err := NewDealer(test.deckSize, test.handSize) + if err != nil { + t.Errorf("fail to create Dealer: %v", err) + return + } + hand := make([]int, 0) + hand = dealer.DealIntoHand(hashValue, hand) + + // check cards number + if len(hand) != int(test.handSize) { + t.Errorf("test case %s fails in cards number", test.name) return } - if test.validated { - // check cards number - if len(cards) != int(test.handSize) { - t.Errorf("test case %s fails in cards number", test.name) - return - } - - // check cards range and duplication - cardMap := make(map[int]struct{}) - for _, card := range cards { - if card < 0 || card >= int(test.deckSize) { - t.Errorf("test case %s fails in range check", test.name) - return - } - cardMap[card] = struct{}{} - } - if len(cardMap) != int(test.handSize) { - t.Errorf("test case %s fails in duplication check", test.name) + // check cards range and duplication + cardMap := make(map[int]struct{}) + for _, card := range hand { + if card < 0 || card >= int(test.deckSize) { + t.Errorf("test case %s fails in range check", test.name) return } + cardMap[card] = struct{}{} + } + if len(cardMap) != int(test.handSize) { + t.Errorf("test case %s fails in duplication check", test.name) + return } - }) - } -} -func BenchmarkShuffleAndDealToSlice(b *testing.B) { - hashValueBase := math.MaxUint64 / uint64(b.N) - deckSize, handSize := 512, 8 - for i := 0; i < b.N; i++ { - _ = ShuffleAndDealToSlice(hashValueBase*uint64(i), deckSize, handSize) + }) } } @@ -293,7 +199,7 @@ func ff(n, m int) int { } func TestUniformDistribution(t *testing.T) { - const spare = 64 - maxHashBits + const spare = 64 - MaxHashBits tests := []struct { deckSize, handSize int hashMax int @@ -304,6 +210,11 @@ func TestUniformDistribution(t *testing.T) { {70, 4, ff(70, 4)}, } for _, test := range tests { + dealer, err := NewDealer(test.deckSize, test.handSize) + if err != nil { + t.Errorf("fail to create Dealer: %v", err) + return + } handCoordinateMap := make(map[int]int) // maps coded hand to count of times seen fallingFactorial := ff(test.deckSize, test.handSize) @@ -314,7 +225,7 @@ func TestUniformDistribution(t *testing.T) { maxCount := permutations * int(math.Ceil(nff)) aHand := make([]int, test.handSize) for i := 0; i < test.hashMax; i++ { - ShuffleAndDealIntoHand(uint64(i), test.deckSize, aHand) + aHand = dealer.DealIntoHand(uint64(i), aHand) sort.IntSlice(aHand).Sort() handCoordinate := 0 for _, card := range aHand { @@ -345,3 +256,63 @@ func TestUniformDistribution(t *testing.T) { } return } + +func TestDealer_DealIntoHand(t *testing.T) { + dealer, _ := NewDealer(6, 6) + + tests := []struct { + name string + hand []int + expectedSize int + }{ + { + "nil slice", + nil, + 6, + }, + { + "empty slice", + make([]int, 0), + 6, + }, + { + "size: 6 cap: 6 slice", + make([]int, 6, 6), + 6, + }, + { + "size: 6 cap: 12 slice", + make([]int, 6, 12), + 6, + }, + { + "size: 4 cap: 4 slice", + make([]int, 4, 4), + 6, + }, + { + "size: 4 cap: 12 slice", + make([]int, 4, 12), + 6, + }, + { + "size: 10 cap: 10 slice", + make([]int, 10, 10), + 6, + }, + { + "size: 10 cap: 12 slice", + make([]int, 10, 12), + 6, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + h := dealer.DealIntoHand(0, test.hand) + if len(h) != test.expectedSize { + t.Errorf("test %s fails: expetced size %d but got %d", test.name, test.expectedSize, len(h)) + return + } + }) + } +}