From 5ea427ebb508ce2871d4d0f9869959275c0d3bce Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Sat, 17 Aug 2024 11:01:41 +0200 Subject: [PATCH] Use btree for watch cache storage to serve LIST more efficiently Can be disabled via BtreeWatchCache feature flag. --- pkg/features/versioned_kube_features.go | 4 ++ .../apiserver/pkg/features/kube_features.go | 9 ++++ .../apiserver/pkg/storage/cacher/store.go | 42 +++++++++++++++++++ .../pkg/storage/cacher/store_btree_test.go | 4 +- .../pkg/storage/cacher/store_test.go | 4 +- .../pkg/storage/cacher/watch_cache.go | 39 ++++++++--------- .../test_data/versioned_feature_list.yaml | 6 +++ 7 files changed, 83 insertions(+), 25 deletions(-) diff --git a/pkg/features/versioned_kube_features.go b/pkg/features/versioned_kube_features.go index 0ed4bef24fc..05c38cb34f1 100644 --- a/pkg/features/versioned_kube_features.go +++ b/pkg/features/versioned_kube_features.go @@ -241,6 +241,10 @@ var defaultVersionedKubernetesFeatureGates = map[featuregate.Feature]featuregate {Version: version.MustParse("1.32"), Default: true, PreRelease: featuregate.Beta}, }, + genericfeatures.BtreeWatchCache: { + {Version: version.MustParse("1.32"), Default: true, PreRelease: featuregate.Beta}, + }, + genericfeatures.ConcurrentWatchObjectDecode: { {Version: version.MustParse("1.31"), Default: false, PreRelease: featuregate.Beta}, }, diff --git a/staging/src/k8s.io/apiserver/pkg/features/kube_features.go b/staging/src/k8s.io/apiserver/pkg/features/kube_features.go index 1c5d1cc6d49..6ccbaca5a8e 100644 --- a/staging/src/k8s.io/apiserver/pkg/features/kube_features.go +++ b/staging/src/k8s.io/apiserver/pkg/features/kube_features.go @@ -96,6 +96,11 @@ const ( // This feature is currently PRE-ALPHA and MUST NOT be enabled outside of integration tests. TestOnlyCBORServingAndStorage featuregate.Feature = "TestOnlyCBORServingAndStorage" + // owner: @serathius + // + // Replaces watch cache hashmap implementation with a btree based one, bringing performance improvements. + BtreeWatchCache featuregate.Feature = "BtreeWatchCache" + // owner: @serathius // Enables concurrent watch object decoding to avoid starving watch cache when conversion webhook is installed. ConcurrentWatchObjectDecode featuregate.Feature = "ConcurrentWatchObjectDecode" @@ -299,6 +304,10 @@ var defaultVersionedKubernetesFeatureGates = map[featuregate.Feature]featuregate {Version: version.MustParse("1.30"), Default: false, PreRelease: featuregate.Alpha}, }, + BtreeWatchCache: { + {Version: version.MustParse("1.32"), Default: true, PreRelease: featuregate.Beta}, + }, + AuthorizeWithSelectors: { {Version: version.MustParse("1.31"), Default: false, PreRelease: featuregate.Alpha}, {Version: version.MustParse("1.32"), Default: true, PreRelease: featuregate.Beta}, diff --git a/staging/src/k8s.io/apiserver/pkg/storage/cacher/store.go b/staging/src/k8s.io/apiserver/pkg/storage/cacher/store.go index c0007b73102..8edad10a27f 100644 --- a/staging/src/k8s.io/apiserver/pkg/storage/cacher/store.go +++ b/staging/src/k8s.io/apiserver/pkg/storage/cacher/store.go @@ -24,9 +24,44 @@ import ( "k8s.io/apimachinery/pkg/fields" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apiserver/pkg/features" + utilfeature "k8s.io/apiserver/pkg/util/feature" "k8s.io/client-go/tools/cache" ) +const ( + // btreeDegree defines the degree of btree storage. + // Decided based on the benchmark results (below). + // Selected the lowest degree from three options with best runtime (16,32,128). + // │ 2 │ 4 │ 8 │ 16 │ 32 │ 64 │ 128 │ + // │ sec/op │ sec/op vs base │ sec/op vs base │ sec/op vs base │ sec/op vs base │ sec/op vs base │ sec/op vs base │ + // StoreCreateList/RV=NotOlderThan-24 473.0µ ± 11% 430.1µ ± 9% -9.08% (p=0.005 n=10) 427.9µ ± 6% -9.54% (p=0.002 n=10) 403.9µ ± 8% -14.62% (p=0.000 n=10) 401.0µ ± 4% -15.22% (p=0.000 n=10) 408.0µ ± 4% -13.75% (p=0.000 n=10) 385.9µ ± 4% -18.42% (p=0.000 n=10) + // StoreCreateList/RV=ExactMatch-24 604.7µ ± 4% 596.7µ ± 8% ~ (p=0.529 n=10) 604.6µ ± 4% ~ (p=0.971 n=10) 601.1µ ± 4% ~ (p=0.853 n=10) 611.0µ ± 6% ~ (p=0.105 n=10) 598.2µ ± 5% ~ (p=0.579 n=10) 608.2µ ± 3% ~ (p=0.796 n=10) + // StoreList/List=All/Paginate=False/RV=Empty-24 729.1µ ± 5% 692.9µ ± 3% -4.96% (p=0.002 n=10) 693.7µ ± 3% -4.86% (p=0.000 n=10) 688.3µ ± 1% -5.59% (p=0.000 n=10) 690.4µ ± 5% -5.31% (p=0.002 n=10) 689.7µ ± 2% -5.40% (p=0.000 n=10) 687.8µ ± 3% -5.67% (p=0.000 n=10) + // StoreList/List=All/Paginate=True/RV=Empty-24 19.51m ± 2% 19.84m ± 2% ~ (p=0.105 n=10) 19.89m ± 3% ~ (p=0.190 n=10) 19.64m ± 4% ~ (p=0.853 n=10) 19.34m ± 4% ~ (p=0.481 n=10) 20.22m ± 4% +3.66% (p=0.007 n=10) 19.58m ± 4% ~ (p=0.912 n=10) + // StoreList/List=Namespace/Paginate=False/RV=Empty-24 1.672m ± 4% 1.635m ± 2% ~ (p=0.247 n=10) 1.673m ± 5% ~ (p=0.631 n=10) 1.657m ± 2% ~ (p=0.971 n=10) 1.656m ± 4% ~ (p=0.739 n=10) 1.678m ± 2% ~ (p=0.631 n=10) 1.718m ± 8% ~ (p=0.105 n=10) + // geomean 1.467m 1.420m -3.24% 1.430m -2.58% 1.403m -4.38% 1.402m -4.46% 1.417m -3.44% 1.403m -4.41% + // + // │ 2 │ 4 │ 8 │ 16 │ 32 │ 64 │ 128 │ + // │ B/op │ B/op vs base │ B/op vs base │ B/op vs base │ B/op vs base │ B/op vs base │ B/op vs base │ + // StoreCreateList/RV=NotOlderThan-24 98.58Ki ± 11% 101.33Ki ± 13% ~ (p=0.280 n=10) 99.80Ki ± 26% ~ (p=0.353 n=10) 109.63Ki ± 9% ~ (p=0.075 n=10) 112.56Ki ± 6% +14.18% (p=0.007 n=10) 114.41Ki ± 10% +16.05% (p=0.003 n=10) 115.06Ki ± 12% +16.72% (p=0.011 n=10) + // StoreCreateList/RV=ExactMatch-24 117.1Ki ± 0% 117.5Ki ± 0% ~ (p=0.218 n=10) 116.9Ki ± 0% ~ (p=0.052 n=10) 117.3Ki ± 0% ~ (p=0.353 n=10) 116.9Ki ± 0% ~ (p=0.075 n=10) 117.0Ki ± 0% ~ (p=0.436 n=10) 117.0Ki ± 0% ~ (p=0.280 n=10) + // StoreList/List=All/Paginate=False/RV=Empty-24 6.023Mi ± 0% 6.024Mi ± 0% +0.01% (p=0.037 n=10) 6.024Mi ± 0% ~ (p=0.493 n=10) 6.024Mi ± 0% +0.01% (p=0.035 n=10) 6.024Mi ± 0% ~ (p=0.247 n=10) 6.024Mi ± 0% ~ (p=0.247 n=10) 6.024Mi ± 0% ~ (p=0.315 n=10) + // StoreList/List=All/Paginate=True/RV=Empty-24 64.22Mi ± 0% 64.21Mi ± 0% ~ (p=0.075 n=10) 64.23Mi ± 0% ~ (p=0.280 n=10) 64.21Mi ± 0% -0.02% (p=0.002 n=10) 64.22Mi ± 0% ~ (p=0.579 n=10) 64.22Mi ± 0% ~ (p=0.971 n=10) 64.22Mi ± 0% ~ (p=1.000 n=10) + // StoreList/List=Namespace/Paginate=False/RV=Empty-24 8.177Mi ± 0% 8.178Mi ± 0% ~ (p=0.579 n=10) 8.177Mi ± 0% ~ (p=0.971 n=10) 8.179Mi ± 0% ~ (p=0.579 n=10) 8.178Mi ± 0% ~ (p=0.739 n=10) 8.179Mi ± 0% ~ (p=0.315 n=10) 8.176Mi ± 0% ~ (p=0.247 n=10) + // geomean 2.034Mi 2.047Mi +0.61% 2.039Mi +0.22% 2.079Mi +2.19% 2.088Mi +2.66% 2.095Mi +3.01% 2.098Mi +3.12% + // + // │ 2 │ 4 │ 8 │ 16 │ 32 │ 64 │ 128 │ + // │ allocs/op │ allocs/op vs base │ allocs/op vs base │ allocs/op vs base │ allocs/op vs base │ allocs/op vs base │ allocs/op vs base │ + // StoreCreateList/RV=NotOlderThan-24 560.0 ± 0% 558.0 ± 0% -0.36% (p=0.000 n=10) 557.0 ± 0% -0.54% (p=0.000 n=10) 558.0 ± 0% -0.36% (p=0.000 n=10) 557.0 ± 0% -0.54% (p=0.000 n=10) 557.0 ± 0% -0.54% (p=0.000 n=10) 557.0 ± 0% -0.54% (p=0.000 n=10) + // StoreCreateList/RV=ExactMatch-24 871.0 ± 0% 870.0 ± 0% -0.11% (p=0.038 n=10) 870.0 ± 0% -0.11% (p=0.004 n=10) 870.0 ± 0% -0.11% (p=0.005 n=10) 869.0 ± 0% -0.23% (p=0.000 n=10) 870.0 ± 0% -0.11% (p=0.001 n=10) 870.0 ± 0% -0.11% (p=0.000 n=10) + // StoreList/List=All/Paginate=False/RV=Empty-24 351.0 ± 3% 358.0 ± 1% +1.99% (p=0.034 n=10) 352.5 ± 3% ~ (p=0.589 n=10) 358.5 ± 1% +2.14% (p=0.022 n=10) 356.5 ± 3% ~ (p=0.208 n=10) 355.0 ± 3% ~ (p=0.224 n=10) 355.0 ± 3% ~ (p=0.183 n=10) + // StoreList/List=All/Paginate=True/RV=Empty-24 494.4k ± 0% 494.4k ± 0% ~ (p=0.424 n=10) 494.6k ± 0% +0.06% (p=0.000 n=10) 492.7k ± 0% -0.34% (p=0.000 n=10) 494.5k ± 0% +0.02% (p=0.009 n=10) 493.0k ± 0% -0.28% (p=0.000 n=10) 494.4k ± 0% ~ (p=0.424 n=10) + // StoreList/List=Namespace/Paginate=False/RV=Empty-24 32.43k ± 0% 32.44k ± 0% ~ (p=0.579 n=10) 32.43k ± 0% ~ (p=0.971 n=10) 32.45k ± 0% ~ (p=0.517 n=10) 32.44k ± 0% ~ (p=0.670 n=10) 32.46k ± 0% ~ (p=0.256 n=10) 32.41k ± 0% ~ (p=0.247 n=10) + // geomean 4.872k 4.887k +0.31% 4.870k -0.03% 4.885k +0.28% 4.880k +0.17% 4.875k +0.06% 4.876k +0.08% + btreeDegree = 16 +) + type storeIndexer interface { Add(obj interface{}) error Update(obj interface{}) error @@ -39,7 +74,14 @@ type storeIndexer interface { ByIndex(indexName, indexedValue string) ([]interface{}, error) } +type orderedLister interface { + ListPrefix(prefix, continueKey string, limit int) (items []interface{}, hasMore bool) +} + func newStoreIndexer(indexers *cache.Indexers) storeIndexer { + if utilfeature.DefaultFeatureGate.Enabled(features.BtreeWatchCache) { + return newThreadedBtreeStoreIndexer(storeElementIndexers(indexers), btreeDegree) + } return cache.NewIndexer(storeElementKey, storeElementIndexers(indexers)) } diff --git a/staging/src/k8s.io/apiserver/pkg/storage/cacher/store_btree_test.go b/staging/src/k8s.io/apiserver/pkg/storage/cacher/store_btree_test.go index 1b5143a4462..b0e05d628ec 100644 --- a/staging/src/k8s.io/apiserver/pkg/storage/cacher/store_btree_test.go +++ b/staging/src/k8s.io/apiserver/pkg/storage/cacher/store_btree_test.go @@ -23,7 +23,7 @@ import ( ) func TestStoreListOrdered(t *testing.T) { - store := newThreadedBtreeStoreIndexer(nil, 32) + store := newThreadedBtreeStoreIndexer(nil, btreeDegree) assert.NoError(t, store.Add(testStorageElement("foo3", "bar3", 1))) assert.NoError(t, store.Add(testStorageElement("foo1", "bar2", 2))) assert.NoError(t, store.Add(testStorageElement("foo2", "bar1", 3))) @@ -35,7 +35,7 @@ func TestStoreListOrdered(t *testing.T) { } func TestStoreListPrefix(t *testing.T) { - store := newThreadedBtreeStoreIndexer(nil, 32) + store := newThreadedBtreeStoreIndexer(nil, btreeDegree) assert.NoError(t, store.Add(testStorageElement("foo3", "bar3", 1))) assert.NoError(t, store.Add(testStorageElement("foo1", "bar2", 2))) assert.NoError(t, store.Add(testStorageElement("foo2", "bar1", 3))) diff --git a/staging/src/k8s.io/apiserver/pkg/storage/cacher/store_test.go b/staging/src/k8s.io/apiserver/pkg/storage/cacher/store_test.go index 4703066c19f..7018bc334e6 100644 --- a/staging/src/k8s.io/apiserver/pkg/storage/cacher/store_test.go +++ b/staging/src/k8s.io/apiserver/pkg/storage/cacher/store_test.go @@ -33,7 +33,7 @@ func TestStoreSingleKey(t *testing.T) { testStoreSingleKey(t, store) }) t.Run("btree", func(t *testing.T) { - store := newThreadedBtreeStoreIndexer(storeElementIndexers(testStoreIndexers()), 32) + store := newThreadedBtreeStoreIndexer(storeElementIndexers(testStoreIndexers()), btreeDegree) testStoreSingleKey(t, store) }) } @@ -65,7 +65,7 @@ func TestStoreIndexerSingleKey(t *testing.T) { testStoreIndexerSingleKey(t, store) }) t.Run("btree", func(t *testing.T) { - store := newThreadedBtreeStoreIndexer(storeElementIndexers(testStoreIndexers()), 32) + store := newThreadedBtreeStoreIndexer(storeElementIndexers(testStoreIndexers()), btreeDegree) testStoreIndexerSingleKey(t, store) }) } diff --git a/staging/src/k8s.io/apiserver/pkg/storage/cacher/watch_cache.go b/staging/src/k8s.io/apiserver/pkg/storage/cacher/watch_cache.go index 63599f9aab7..541988b31fd 100644 --- a/staging/src/k8s.io/apiserver/pkg/storage/cacher/watch_cache.go +++ b/staging/src/k8s.io/apiserver/pkg/storage/cacher/watch_cache.go @@ -452,20 +452,7 @@ func (s sortableStoreElements) Swap(i, j int) { // WaitUntilFreshAndList returns list of pointers to `storeElement` objects along // with their ResourceVersion and the name of the index, if any, that was used. -func (w *watchCache) WaitUntilFreshAndList(ctx context.Context, resourceVersion uint64, key string, matchValues []storage.MatchValue) ([]interface{}, uint64, string, error) { - items, rv, index, err := w.waitUntilFreshAndListItems(ctx, resourceVersion, key, matchValues) - if err != nil { - return nil, 0, "", err - } - result, err := filterPrefix(key, items) - if err != nil { - return nil, 0, "", err - } - sort.Sort(sortableStoreElements(result)) - return result, rv, index, nil -} - -func (w *watchCache) waitUntilFreshAndListItems(ctx context.Context, resourceVersion uint64, key string, matchValues []storage.MatchValue) (result []interface{}, rv uint64, index string, err error) { +func (w *watchCache) WaitUntilFreshAndList(ctx context.Context, resourceVersion uint64, key string, matchValues []storage.MatchValue) (result []interface{}, rv uint64, index string, err error) { requestWatchProgressSupported := etcdfeature.DefaultFeatureSupportChecker.Supports(storage.RequestWatchProgress) if utilfeature.DefaultFeatureGate.Enabled(features.ConsistentListFromCache) && requestWatchProgressSupported && w.notFresh(resourceVersion) { w.waitingUntilFresh.Add() @@ -479,24 +466,33 @@ func (w *watchCache) waitUntilFreshAndListItems(ctx context.Context, resourceVer if err != nil { return result, rv, index, err } - - result, rv, index, err = func() ([]interface{}, uint64, string, error) { + var prefixFilteredAndOrdered bool + result, rv, index, prefixFilteredAndOrdered, err = func() ([]interface{}, uint64, string, bool, error) { // This isn't the place where we do "final filtering" - only some "prefiltering" is happening here. So the only // requirement here is to NOT miss anything that should be returned. We can return as many non-matching items as we // want - they will be filtered out later. The fact that we return less things is only further performance improvement. // TODO: if multiple indexes match, return the one with the fewest items, so as to do as much filtering as possible. for _, matchValue := range matchValues { if result, err := w.store.ByIndex(matchValue.IndexName, matchValue.Value); err == nil { - return result, w.resourceVersion, matchValue.IndexName, nil + return result, w.resourceVersion, matchValue.IndexName, false, nil } } - return w.store.List(), w.resourceVersion, "", nil + if store, ok := w.store.(orderedLister); ok { + result, _ := store.ListPrefix(key, "", 0) + return result, w.resourceVersion, "", true, nil + } + return w.store.List(), w.resourceVersion, "", false, nil }() - - return result, rv, index, err + if !prefixFilteredAndOrdered { + result, err = filterPrefixAndOrder(key, result) + if err != nil { + return nil, 0, "", err + } + } + return result, w.resourceVersion, index, nil } -func filterPrefix(prefix string, items []interface{}) ([]interface{}, error) { +func filterPrefixAndOrder(prefix string, items []interface{}) ([]interface{}, error) { var result []interface{} for _, item := range items { elem, ok := item.(*storeElement) @@ -508,6 +504,7 @@ func filterPrefix(prefix string, items []interface{}) ([]interface{}, error) { } result = append(result, item) } + sort.Sort(sortableStoreElements(result)) return result, nil } diff --git a/test/featuregates_linter/test_data/versioned_feature_list.yaml b/test/featuregates_linter/test_data/versioned_feature_list.yaml index ca7fdf1590c..33dbf048f6a 100644 --- a/test/featuregates_linter/test_data/versioned_feature_list.yaml +++ b/test/featuregates_linter/test_data/versioned_feature_list.yaml @@ -164,6 +164,12 @@ lockToDefault: false preRelease: Beta version: "1.32" +- name: BtreeWatchCache + versionedSpecs: + - default: true + lockToDefault: false + preRelease: Beta + version: "1.32" - name: CloudControllerManagerWebhook versionedSpecs: - default: false