Don't pool large CBOR encode buffers.

Objects in a sync.Pool are assumed to be fungible. This is not a good assumption for pools
of *bytes.Buffer because a *bytes.Buffer's underlying array grows as needed to accomodate writes. In
Kubernetes, apiservers tend to encode "small" objects very frequently and much larger
objects (especially large lists) only occasionally. Under steady load, pooled buffers tend to be
borrowed frequently enough to prevent them from being released. Over time, each buffer is used to
encode a large object and its capacity increases accordingly. The result is that practically all
buffers in the pool retain much more capacity than needed to encode most objects.

As a basic mitigation for the worst case, buffers with more capacity than the default max request
body size are never returned to the pool.
This commit is contained in:
Ben Luddy 2024-06-14 15:51:52 -04:00
parent bffc02b955
commit a19d142f0d
No known key found for this signature in database
GPG Key ID: A6551E73A5974C30
7 changed files with 272 additions and 77 deletions

View File

@ -99,16 +99,35 @@ func (s *serializer) Identifier() runtime.Identifier {
return "cbor"
}
// Encode writes a CBOR representation of the given object.
//
// Because the CBOR data item written by a call to Encode is always enclosed in the "self-described
// CBOR" tag, its encoded form always has the prefix 0xd9d9f7. This prefix is suitable for use as a
// "magic number" for distinguishing encoded CBOR from other protocols.
//
// The default serialization behavior for any given object replicates the behavior of the JSON
// serializer as far as it is necessary to allow the CBOR serializer to be used as a drop-in
// replacement for the JSON serializer, with limited exceptions. For example, the distinction
// between integers and floating-point numbers is preserved in CBOR due to its distinct
// representations for each type.
//
// Objects implementing runtime.Unstructured will have their unstructured content encoded rather
// than following the default behavior for their dynamic type.
func (s *serializer) Encode(obj runtime.Object, w io.Writer) error {
return s.encode(modes.Encode, obj, w)
}
func (s *serializer) encode(mode modes.EncMode, obj runtime.Object, w io.Writer) error {
if _, err := w.Write(selfDescribedCBOR); err != nil {
return err
}
e := modes.Encode.NewEncoder(w)
var v interface{} = obj
if u, ok := obj.(runtime.Unstructured); ok {
return e.Encode(u.UnstructuredContent())
v = u.UnstructuredContent()
}
return e.Encode(obj)
return mode.MarshalTo(v, w)
}
// gvkWithDefaults returns group kind and version defaulting from provided default

View File

@ -0,0 +1,65 @@
/*
Copyright 2024 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package modes
import (
"bytes"
"sync"
)
var buffers = BufferProvider{p: new(sync.Pool)}
type buffer struct {
bytes.Buffer
}
type pool interface {
Get() interface{}
Put(interface{})
}
type BufferProvider struct {
p pool
}
func (b *BufferProvider) Get() *buffer {
if buf, ok := b.p.Get().(*buffer); ok {
return buf
}
return &buffer{}
}
func (b *BufferProvider) Put(buf *buffer) {
if buf.Cap() > 3*1024*1024 /* Default MaxRequestBodyBytes */ {
// Objects in a sync.Pool are assumed to be fungible. This is not a good assumption
// for pools of *bytes.Buffer because a *bytes.Buffer's underlying array grows as
// needed to accommodate writes. In Kubernetes, apiservers tend to encode "small"
// objects very frequently and much larger objects (especially large lists) only
// occasionally. Under steady load, pooled buffers tend to be borrowed frequently
// enough to prevent them from being released. Over time, each buffer is used to
// encode a large object and its capacity increases accordingly. The result is that
// practically all buffers in the pool retain much more capacity than needed to
// encode most objects.
// As a basic mitigation for the worst case, buffers with more capacity than the
// default max request body size are never returned to the pool.
// TODO: Optimize for higher buffer utilization.
return
}
buf.Reset()
b.p.Put(buf)
}

View File

@ -0,0 +1,61 @@
/*
Copyright 2024 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package modes
import (
"testing"
)
type mockPool struct {
v interface{}
}
func (*mockPool) Get() interface{} {
return nil
}
func (p *mockPool) Put(v interface{}) {
p.v = v
}
func TestBufferProviderPut(t *testing.T) {
{
p := new(mockPool)
bp := &BufferProvider{p: p}
small := new(buffer)
small.Grow(3 * 1024 * 1024)
small.WriteString("hello world")
bp.Put(small)
if p.v != small {
t.Errorf("expected buf with capacity %d to be returned to pool", small.Cap())
}
if small.Len() != 0 {
t.Errorf("expected buf to be reset before returning to pool")
}
}
{
p := new(mockPool)
bp := &BufferProvider{p: p}
big := new(buffer)
big.Grow(3*1024*1024 + 1)
bp.Put(big)
if p.v != nil {
t.Errorf("expected buf with capacity %d not to be returned to pool", big.Cap())
}
}
}

View File

@ -17,93 +17,139 @@ limitations under the License.
package modes
import (
"io"
"github.com/fxamacker/cbor/v2"
)
var Encode cbor.EncMode = func() cbor.EncMode {
encode, err := cbor.EncOptions{
// Map keys need to be sorted to have deterministic output, and this is the order
// defined in RFC 8949 4.2.1 "Core Deterministic Encoding Requirements".
Sort: cbor.SortBytewiseLexical,
var Encode = EncMode{
delegate: func() cbor.UserBufferEncMode {
encode, err := cbor.EncOptions{
// Map keys need to be sorted to have deterministic output, and this is the order
// defined in RFC 8949 4.2.1 "Core Deterministic Encoding Requirements".
Sort: cbor.SortBytewiseLexical,
// CBOR supports distinct types for IEEE-754 float16, float32, and float64. Store
// floats in the smallest width that preserves value so that equivalent float32 and
// float64 values encode to identical bytes, as they do in a JSON
// encoding. Satisfies one of the "Core Deterministic Encoding Requirements".
ShortestFloat: cbor.ShortestFloat16,
// CBOR supports distinct types for IEEE-754 float16, float32, and float64. Store
// floats in the smallest width that preserves value so that equivalent float32 and
// float64 values encode to identical bytes, as they do in a JSON
// encoding. Satisfies one of the "Core Deterministic Encoding Requirements".
ShortestFloat: cbor.ShortestFloat16,
// Error on attempt to encode NaN and infinite values. This is what the JSON
// serializer does.
NaNConvert: cbor.NaNConvertReject,
InfConvert: cbor.InfConvertReject,
// Error on attempt to encode NaN and infinite values. This is what the JSON
// serializer does.
NaNConvert: cbor.NaNConvertReject,
InfConvert: cbor.InfConvertReject,
// Error on attempt to encode math/big.Int values, which can't be faithfully
// roundtripped through Unstructured in general (the dynamic numeric types allowed
// in Unstructured are limited to float64 and int64).
BigIntConvert: cbor.BigIntConvertReject,
// Error on attempt to encode math/big.Int values, which can't be faithfully
// roundtripped through Unstructured in general (the dynamic numeric types allowed
// in Unstructured are limited to float64 and int64).
BigIntConvert: cbor.BigIntConvertReject,
// MarshalJSON for time.Time writes RFC3339 with nanos.
Time: cbor.TimeRFC3339Nano,
// MarshalJSON for time.Time writes RFC3339 with nanos.
Time: cbor.TimeRFC3339Nano,
// The decoder must be able to accept RFC3339 strings with or without tag 0 (e.g. by
// the end of time.Time -> JSON -> Unstructured -> CBOR, the CBOR encoder has no
// reliable way of knowing that a particular string originated from serializing a
// time.Time), so producing tag 0 has little use.
TimeTag: cbor.EncTagNone,
// The decoder must be able to accept RFC3339 strings with or without tag 0 (e.g. by
// the end of time.Time -> JSON -> Unstructured -> CBOR, the CBOR encoder has no
// reliable way of knowing that a particular string originated from serializing a
// time.Time), so producing tag 0 has little use.
TimeTag: cbor.EncTagNone,
// Indefinite-length items have multiple encodings and aren't being used anyway, so
// disable to avoid an opportunity for nondeterminism.
IndefLength: cbor.IndefLengthForbidden,
// Indefinite-length items have multiple encodings and aren't being used anyway, so
// disable to avoid an opportunity for nondeterminism.
IndefLength: cbor.IndefLengthForbidden,
// Preserve distinction between nil and empty for slices and maps.
NilContainers: cbor.NilContainerAsNull,
// Preserve distinction between nil and empty for slices and maps.
NilContainers: cbor.NilContainerAsNull,
// OK to produce tags.
TagsMd: cbor.TagsAllowed,
// OK to produce tags.
TagsMd: cbor.TagsAllowed,
// Use the same definition of "empty" as encoding/json.
OmitEmpty: cbor.OmitEmptyGoValue,
// Use the same definition of "empty" as encoding/json.
OmitEmpty: cbor.OmitEmptyGoValue,
// The CBOR types text string and byte string are structurally equivalent, with the
// semantic difference that a text string whose content is an invalid UTF-8 sequence
// is itself invalid. We reject all invalid text strings at decode time and do not
// validate or sanitize all Go strings at encode time. Encoding Go strings to the
// byte string type is comparable to the existing Protobuf behavior and cheaply
// ensures that the output is valid CBOR.
String: cbor.StringToByteString,
// The CBOR types text string and byte string are structurally equivalent, with the
// semantic difference that a text string whose content is an invalid UTF-8 sequence
// is itself invalid. We reject all invalid text strings at decode time and do not
// validate or sanitize all Go strings at encode time. Encoding Go strings to the
// byte string type is comparable to the existing Protobuf behavior and cheaply
// ensures that the output is valid CBOR.
String: cbor.StringToByteString,
// Encode struct field names to the byte string type rather than the text string
// type.
FieldName: cbor.FieldNameToByteString,
// Encode struct field names to the byte string type rather than the text string
// type.
FieldName: cbor.FieldNameToByteString,
// Marshal Go byte arrays to CBOR arrays of integers (as in JSON) instead of byte
// strings.
ByteArray: cbor.ByteArrayToArray,
// Marshal Go byte arrays to CBOR arrays of integers (as in JSON) instead of byte
// strings.
ByteArray: cbor.ByteArrayToArray,
// Marshal []byte to CBOR byte string enclosed in tag 22 (expected later base64
// encoding, https://www.rfc-editor.org/rfc/rfc8949.html#section-3.4.5.2), to
// interoperate with the existing JSON behavior. This indicates to the decoder that,
// when decoding into a string (or unstructured), the resulting value should be the
// base64 encoding of the original bytes. No base64 encoding or decoding needs to be
// performed for []byte-to-CBOR-to-[]byte roundtrips.
ByteSliceLaterFormat: cbor.ByteSliceLaterFormatBase64,
// Marshal []byte to CBOR byte string enclosed in tag 22 (expected later base64
// encoding, https://www.rfc-editor.org/rfc/rfc8949.html#section-3.4.5.2), to
// interoperate with the existing JSON behavior. This indicates to the decoder that,
// when decoding into a string (or unstructured), the resulting value should be the
// base64 encoding of the original bytes. No base64 encoding or decoding needs to be
// performed for []byte-to-CBOR-to-[]byte roundtrips.
ByteSliceLaterFormat: cbor.ByteSliceLaterFormatBase64,
// Disable default recognition of types implementing encoding.BinaryMarshaler, which
// is not recognized for JSON encoding.
BinaryMarshaler: cbor.BinaryMarshalerNone,
}.EncMode()
if err != nil {
panic(err)
// Disable default recognition of types implementing encoding.BinaryMarshaler, which
// is not recognized for JSON encoding.
BinaryMarshaler: cbor.BinaryMarshalerNone,
}.UserBufferEncMode()
if err != nil {
panic(err)
}
return encode
}(),
}
var EncodeNondeterministic = EncMode{
delegate: func() cbor.UserBufferEncMode {
opts := Encode.options()
opts.Sort = cbor.SortNone // TODO: Use cbor.SortFastShuffle after bump to v2.7.0.
em, err := opts.UserBufferEncMode()
if err != nil {
panic(err)
}
return em
}(),
}
type EncMode struct {
delegate cbor.UserBufferEncMode
}
func (em EncMode) options() cbor.EncOptions {
return em.delegate.EncOptions()
}
func (em EncMode) MarshalTo(v interface{}, w io.Writer) error {
if buf, ok := w.(*buffer); ok {
return em.delegate.MarshalToBuffer(v, &buf.Buffer)
}
return encode
}()
var EncodeNondeterministic cbor.EncMode = func() cbor.EncMode {
opts := Encode.EncOptions()
opts.Sort = cbor.SortNone
em, err := opts.EncMode()
if err != nil {
panic(err)
buf := buffers.Get()
defer buffers.Put(buf)
if err := em.delegate.MarshalToBuffer(v, &buf.Buffer); err != nil {
return err
}
return em
}()
if _, err := io.Copy(w, buf); err != nil {
return err
}
return nil
}
func (em EncMode) Marshal(v interface{}) ([]byte, error) {
buf := buffers.Get()
defer buffers.Put(buf)
if err := em.MarshalTo(v, &buf.Buffer); err != nil {
return nil, err
}
clone := make([]byte, buf.Len())
copy(clone, buf.Bytes())
return clone, nil
}

View File

@ -22,6 +22,8 @@ import (
"reflect"
"testing"
"k8s.io/apimachinery/pkg/runtime/serializer/cbor/internal/modes"
"github.com/fxamacker/cbor/v2"
"github.com/google/go-cmp/cmp"
)
@ -35,7 +37,7 @@ func (i int64BinaryMarshaler) MarshalBinary() ([]byte, error) {
func TestEncode(t *testing.T) {
for _, tc := range []struct {
name string
modes []cbor.EncMode
modes []modes.EncMode
in interface{}
want []byte
assertOnError func(t *testing.T, e error)

View File

@ -25,12 +25,12 @@ import (
"k8s.io/apimachinery/pkg/runtime/serializer/cbor/internal/modes"
)
var encModeNames = map[cbor.EncMode]string{
var encModeNames = map[modes.EncMode]string{
modes.Encode: "Encode",
modes.EncodeNondeterministic: "EncodeNondeterministic",
}
var allEncModes = []cbor.EncMode{
var allEncModes = []modes.EncMode{
modes.Encode,
modes.EncodeNondeterministic,
}

View File

@ -24,6 +24,8 @@ import (
"testing"
"time"
"k8s.io/apimachinery/pkg/runtime/serializer/cbor/internal/modes"
"github.com/fxamacker/cbor/v2"
"github.com/google/go-cmp/cmp"
)
@ -35,7 +37,7 @@ func nilPointerFor[T interface{}]() *T {
// TestRoundtrip roundtrips object serialization to interface{} and back via CBOR.
func TestRoundtrip(t *testing.T) {
type modePair struct {
enc cbor.EncMode
enc modes.EncMode
dec cbor.DecMode
}