Enable JSON-compatible base64 encoding of []byte for CBOR.

The encoding/json package marshals []byte to a JSON string containing the base64 encoding of the
input slice's bytes, and unmarshals JSON strings to []byte by assuming the JSON string contains a
valid base64 text.

As a binary format, CBOR is capable of representing arbitrary byte sequences without converting them
to a text encoding, but it also needs to interoperate with the existing JSON serializer. It does
this using the "expected later encoding" tags defined in RFC 8949, which indicate a specific text
encoding to be used when interoperating with text-based protocols. The actual conversion to or from
a text encoding is deferred until necessary, so no conversion is performed during roundtrips of
[]byte to CBOR.
This commit is contained in:
Ben Luddy 2024-06-10 09:50:01 -04:00
parent 85ede67ac9
commit 38f87df0e3
No known key found for this signature in database
GPG Key ID: A6551E73A5974C30
7 changed files with 151 additions and 30 deletions

View File

@ -150,30 +150,6 @@ func TestRoundtripToUnstructured(t *testing.T) {
// These are GVKs that whose CBOR roundtrippability is blocked by a known issue that must be
// resolved as a prerequisite for alpha.
knownFailureReasons := map[string][]schema.GroupVersionKind{
// Since JSON cannot directly represent arbitrary byte sequences, a byte slice
// encodes to a JSON string containing the base64 encoding of the slice
// contents. Decoding a JSON string into a byte slice assumes (and requires) that
// the JSON string contain base64-encoded data. The CBOR serializer must be
// compatible with this behavior.
"byte slices should be represented in unstructured as base64-encoded strings": {
{Version: "v1", Kind: "Secret"},
{Version: "v1", Kind: "SecretList"},
{Version: "v1", Kind: "RangeAllocation"},
{Version: "v1", Kind: "ConfigMap"},
{Version: "v1", Kind: "ConfigMapList"},
{Group: "admissionregistration.k8s.io", Version: "v1beta1", Kind: "MutatingWebhookConfiguration"},
{Group: "admissionregistration.k8s.io", Version: "v1beta1", Kind: "MutatingWebhookConfigurationList"},
{Group: "admissionregistration.k8s.io", Version: "v1beta1", Kind: "ValidatingWebhookConfiguration"},
{Group: "admissionregistration.k8s.io", Version: "v1beta1", Kind: "ValidatingWebhookConfigurationList"},
{Group: "admissionregistration.k8s.io", Version: "v1", Kind: "MutatingWebhookConfiguration"},
{Group: "admissionregistration.k8s.io", Version: "v1", Kind: "MutatingWebhookConfigurationList"},
{Group: "admissionregistration.k8s.io", Version: "v1", Kind: "ValidatingWebhookConfiguration"},
{Group: "admissionregistration.k8s.io", Version: "v1", Kind: "ValidatingWebhookConfigurationList"},
{Group: "certificates.k8s.io", Version: "v1beta1", Kind: "CertificateSigningRequest"},
{Group: "certificates.k8s.io", Version: "v1beta1", Kind: "CertificateSigningRequestList"},
{Group: "certificates.k8s.io", Version: "v1", Kind: "CertificateSigningRequest"},
{Group: "certificates.k8s.io", Version: "v1", Kind: "CertificateSigningRequestList"},
},
// If a RawExtension's bytes are invalid JSON, its containing object can't be encoded to JSON.
"rawextension needs to work in programs that assume json": {
{Version: "v1", Kind: "List"},

View File

@ -286,11 +286,11 @@ func TestAppendixA(t *testing.T) {
},
},
{
example: hex("d74401020304"),
decoded: "\x01\x02\x03\x04",
encoded: hex("4401020304"),
example: hex("d74401020304"), // 23(h'01020304')
decoded: "01020304",
encoded: hex("483031303230333034"), // '01020304'
reasons: []string{
reasonTagIgnored,
"decoding a byte string enclosed in an expected later encoding tag into an interface{} value automatically converts to the specified encoding for JSON interoperability",
},
},
{

View File

@ -97,8 +97,12 @@ var Decode cbor.DecMode = func() cbor.DecMode {
// Produce string concrete values when decoding a CBOR byte string into interface{}.
DefaultByteStringType: reflect.TypeOf(""),
// Allow CBOR byte strings to be decoded into string destination values.
ByteStringToString: cbor.ByteStringToStringAllowed,
// Allow CBOR byte strings to be decoded into string destination values. If a byte
// string is enclosed in an "expected later encoding" tag
// (https://www.rfc-editor.org/rfc/rfc8949.html#section-3.4.5.2), then the text
// encoding indicated by that tag (e.g. base64) will be applied to the contents of
// the byte string.
ByteStringToString: cbor.ByteStringToStringAllowedWithExpectedLaterEncoding,
// Allow CBOR byte strings to match struct fields when appearing as a map key.
FieldNameByteString: cbor.FieldNameByteStringAllowed,
@ -119,6 +123,12 @@ var Decode cbor.DecMode = func() cbor.DecMode {
NaN: cbor.NaNDecodeForbidden,
Inf: cbor.InfDecodeForbidden,
// When unmarshaling a byte string into a []byte, assume that the byte string
// contains base64-encoded bytes, unless explicitly counterindicated by an "expected
// later encoding" tag. This is consistent with the because of unmarshaling a JSON
// text into a []byte.
ByteStringExpectedFormat: cbor.ByteStringExpectedBase64,
// Reject the arbitrary-precision integer tags because they can't be faithfully
// roundtripped through the allowable Unstructured types.
BignumTag: cbor.BignumTagForbidden,

View File

@ -163,6 +163,46 @@ func TestDecode(t *testing.T) {
want: "",
assertOnError: assertNilError,
},
{
name: "byte string into []byte assumes base64",
in: []byte("\x48AQIDBA=="), // 'AQIDBA=='
into: []byte{},
want: []byte{0x01, 0x02, 0x03, 0x04},
assertOnError: assertNilError,
},
{
name: "byte string into []byte errors on invalid base64",
in: hex("41ff"), // h'ff'
into: []byte{},
assertOnError: assertErrorMessage("cbor: failed to decode base64 from byte string: illegal base64 data at input byte 0"),
},
{
name: "empty byte string into []byte assumes base64",
in: hex("40"), // ''
into: []byte{},
want: []byte{},
assertOnError: assertNilError,
},
{
name: "byte string with expected encoding tag into []byte does not convert",
in: hex("d64401020304"), // 22(h'01020304')
into: []byte{},
want: []byte{0x01, 0x02, 0x03, 0x04},
assertOnError: assertNilError,
},
{
name: "byte string with expected encoding tag into string converts",
in: hex("d64401020304"), // 22(h'01020304')
into: "",
want: "AQIDBA==",
assertOnError: assertNilError,
},
{
name: "byte string with expected encoding tag into interface{} converts",
in: hex("d64401020304"), // 22(h'01020304')
want: "AQIDBA==",
assertOnError: assertNilError,
},
})
group(t, "text string", []test{

View File

@ -79,6 +79,14 @@ var Encode cbor.EncMode = func() cbor.EncMode {
// Marshal Go byte arrays to CBOR arrays of integers (as in JSON) instead of byte
// strings.
ByteArray: cbor.ByteArrayToArray,
// Marshal []byte to CBOR byte string enclosed in tag 22 (expected later base64
// encoding, https://www.rfc-editor.org/rfc/rfc8949.html#section-3.4.5.2), to
// interoperate with the existing JSON behavior. This indicates to the decoder that,
// when decoding into a string (or unstructured), the resulting value should be the
// base64 encoding of the original bytes. No base64 encoding or decoding needs to be
// performed for []byte-to-CBOR-to-[]byte roundtrips.
ByteSliceLaterFormat: cbor.ByteSliceLaterFormatBase64,
}.EncMode()
if err != nil {
panic(err)

View File

@ -70,6 +70,18 @@ func TestEncode(t *testing.T) {
want: []byte{0x83, 0x01, 0x02, 0x03}, // [1, 2, 3]
assertOnError: assertNilError,
},
{
name: "string marshalled to byte string",
in: "hello",
want: []byte{0x45, 'h', 'e', 'l', 'l', 'o'},
assertOnError: assertNilError,
},
{
name: "[]byte marshalled to byte string in expected base64 encoding tag",
in: []byte("hello"),
want: []byte{0xd6, 0x45, 'h', 'e', 'l', 'l', 'o'},
assertOnError: assertNilError,
},
} {
encModes := tc.modes
if len(encModes) == 0 {

View File

@ -17,6 +17,7 @@ limitations under the License.
package modes_test
import (
"encoding/base64"
"fmt"
"math"
"reflect"
@ -340,3 +341,77 @@ func TestRoundtrip(t *testing.T) {
}
}
}
// TestRoundtripTextEncoding exercises roundtrips between []byte and string.
func TestRoundtripTextEncoding(t *testing.T) {
for _, encMode := range allEncModes {
for _, decMode := range allDecModes {
t.Run(fmt.Sprintf("enc=%s/dec=%s/byte slice", encModeNames[encMode], decModeNames[decMode]), func(t *testing.T) {
original := []byte("foo")
c, err := encMode.Marshal(original)
if err != nil {
t.Fatal(err)
}
var unstructured interface{}
if err := decMode.Unmarshal(c, &unstructured); err != nil {
t.Fatal(err)
}
if diff := cmp.Diff(base64.StdEncoding.EncodeToString(original), unstructured); diff != "" {
t.Errorf("[]byte to interface{}: unexpected diff:\n%s", diff)
}
var s string
if err := decMode.Unmarshal(c, &s); err != nil {
t.Fatal(err)
}
if diff := cmp.Diff(base64.StdEncoding.EncodeToString(original), s); diff != "" {
t.Errorf("[]byte to string: unexpected diff:\n%s", diff)
}
var final []byte
if err := decMode.Unmarshal(c, &final); err != nil {
t.Fatal(err)
}
if diff := cmp.Diff(original, final); diff != "" {
t.Errorf("[]byte to []byte: unexpected diff:\n%s", diff)
}
})
t.Run(fmt.Sprintf("enc=%s/dec=%s/string", encModeNames[encMode], decModeNames[decMode]), func(t *testing.T) {
decoded := "foo"
original := base64.StdEncoding.EncodeToString([]byte(decoded)) // "Zm9v"
c, err := encMode.Marshal(original)
if err != nil {
t.Fatal(err)
}
var unstructured interface{}
if err := decMode.Unmarshal(c, &unstructured); err != nil {
t.Fatal(err)
}
if diff := cmp.Diff(original, unstructured); diff != "" {
t.Errorf("string to interface{}: unexpected diff:\n%s", diff)
}
var b []byte
if err := decMode.Unmarshal(c, &b); err != nil {
t.Fatal(err)
}
if diff := cmp.Diff([]byte(decoded), b); diff != "" {
t.Errorf("string to []byte: unexpected diff:\n%s", diff)
}
var final string
if err := decMode.Unmarshal(c, &final); err != nil {
t.Fatal(err)
}
if diff := cmp.Diff(original, final); diff != "" {
t.Errorf("string to string: unexpected diff:\n%s", diff)
}
})
}
}
}