From ae2319b6dbeb388a9b1515f53d0c73ad499f52ed Mon Sep 17 00:00:00 2001 From: Kermit Alexander Date: Thu, 14 Jul 2022 17:16:08 +0000 Subject: [PATCH] Replace estimateMinSizeJSON with DeclType.MinSerializedSize. --- .../pkg/apiserver/schema/cel/compilation.go | 2 +- .../forked/celopenapi/model/schemas.go | 135 +++++++----------- .../forked/celopenapi/model/schemas_test.go | 29 ++++ .../forked/celopenapi/model/types.go | 65 +++++---- .../forked/celopenapi/model/value.go | 2 +- 5 files changed, 122 insertions(+), 111 deletions(-) diff --git a/staging/src/k8s.io/apiextensions-apiserver/pkg/apiserver/schema/cel/compilation.go b/staging/src/k8s.io/apiextensions-apiserver/pkg/apiserver/schema/cel/compilation.go index 7f226d7379f..998a29ba655 100644 --- a/staging/src/k8s.io/apiextensions-apiserver/pkg/apiserver/schema/cel/compilation.go +++ b/staging/src/k8s.io/apiextensions-apiserver/pkg/apiserver/schema/cel/compilation.go @@ -141,7 +141,7 @@ func Compile(s *schema.Structural, declType *celmodel.DeclType, perCallLimit uin estimator := newCostEstimator(root) // compResults is the return value which saves a list of compilation results in the same order as x-kubernetes-validations rules. compResults := make([]CompilationResult, len(celRules)) - maxCardinality := celmodel.MaxCardinality(s) + maxCardinality := celmodel.MaxCardinality(root.MinSerializedSize) for i, rule := range celRules { compResults[i] = compileRule(rule, env, perCallLimit, estimator, maxCardinality) } diff --git a/staging/src/k8s.io/apiextensions-apiserver/third_party/forked/celopenapi/model/schemas.go b/staging/src/k8s.io/apiextensions-apiserver/third_party/forked/celopenapi/model/schemas.go index 9f17b5ce50d..0a907519b5b 100644 --- a/staging/src/k8s.io/apiextensions-apiserver/third_party/forked/celopenapi/model/schemas.go +++ b/staging/src/k8s.io/apiextensions-apiserver/third_party/forked/celopenapi/model/schemas.go @@ -44,6 +44,12 @@ const ( // RFC 3339 datetimes require a full date (YYYY-MM-DD) and full time (HH:MM:SS), and we add 3 for // quotation marks like always in addition to the capital T that separates the date and time minDatetimeSizeJSON = 21 + // "" + minStringSize = 2 + // true + minBoolSize = 4 + // 0 + minNumberSize = 1 ) // SchemaDeclType converts the structural schema to a CEL declaration, or returns nil if the @@ -72,8 +78,8 @@ func SchemaDeclType(s *schema.Structural, isResourceRoot bool) *DeclType { // To validate requirements on both the int and string representation: // `type(intOrStringField) == int ? intOrStringField < 5 : double(intOrStringField.replace('%', '')) < 0.5 // - dyn := newSimpleType("dyn", cel.DynType, nil) - // handle x-kubernetes-int-or-string by returning the max length of the largest possible string + dyn := newSimpleTypeWithMinSize("dyn", cel.DynType, nil, 1) // smallest value for a serialied x-kubernetes-int-or-string is 0 + // handle x-kubernetes-int-or-string by returning the max length/min serialized size of the largest possible string dyn.MaxElements = maxRequestSizeBytes - 2 return dyn } @@ -92,15 +98,16 @@ func SchemaDeclType(s *schema.Structural, isResourceRoot bool) *DeclType { case "array": if s.Items != nil { itemsType := SchemaDeclType(s.Items, s.Items.XEmbeddedResource) + if itemsType == nil { + return nil + } var maxItems int64 if s.ValueValidation != nil && s.ValueValidation.MaxItems != nil { maxItems = zeroIfNegative(*s.ValueValidation.MaxItems) } else { - maxItems = estimateMaxArrayItemsPerRequest(s.Items) - } - if itemsType != nil { - return NewListType(itemsType, maxItems) + maxItems = estimateMaxArrayItemsFromMinSize(itemsType.MinSerializedSize) } + return NewListType(itemsType, maxItems) } return nil case "object": @@ -111,7 +118,7 @@ func SchemaDeclType(s *schema.Structural, isResourceRoot bool) *DeclType { if s.ValueValidation != nil && s.ValueValidation.MaxProperties != nil { maxProperties = zeroIfNegative(*s.ValueValidation.MaxProperties) } else { - maxProperties = estimateMaxAdditionalPropertiesPerRequest(s.AdditionalProperties.Structural) + maxProperties = estimateMaxAdditionalPropertiesFromMinSize(propsType.MinSerializedSize) } return NewMapType(StringType, propsType, maxProperties) } @@ -125,6 +132,8 @@ func SchemaDeclType(s *schema.Structural, isResourceRoot bool) *DeclType { required[f] = true } } + // an object will always be serialized at least as {}, so account for that + minSerializedSize := int64(2) for name, prop := range s.Properties { var enumValues []interface{} if prop.ValueValidation != nil { @@ -142,14 +151,23 @@ func SchemaDeclType(s *schema.Structural, isResourceRoot bool) *DeclType { enumValues: enumValues, // Enum values are represented as strings in CEL } } + // the min serialized size for an object is 2 (for {}) plus the min size of all its required + // properties + // only include required properties without a default value; default values are filled in + // server-side + if required[name] && prop.Default.Object == nil { + minSerializedSize += int64(len(name)) + fieldType.MinSerializedSize + 4 + } } } - return NewObjectType("object", fields) + objType := NewObjectType("object", fields) + objType.MinSerializedSize = minSerializedSize + return objType case "string": if s.ValueValidation != nil { switch s.ValueValidation.Format { case "byte": - byteWithMaxLength := newSimpleType("bytes", cel.BytesType, types.Bytes([]byte{})) + byteWithMaxLength := newSimpleTypeWithMinSize("bytes", cel.BytesType, types.Bytes([]byte{}), minStringSize) if s.ValueValidation.MaxLength != nil { byteWithMaxLength.MaxElements = zeroIfNegative(*s.ValueValidation.MaxLength) } else { @@ -157,16 +175,20 @@ func SchemaDeclType(s *schema.Structural, isResourceRoot bool) *DeclType { } return byteWithMaxLength case "duration": - durationWithMaxLength := newSimpleType("duration", cel.DurationType, types.Duration{Duration: time.Duration(0)}) + durationWithMaxLength := newSimpleTypeWithMinSize("duration", cel.DurationType, types.Duration{Duration: time.Duration(0)}, int64(minDurationSizeJSON)) durationWithMaxLength.MaxElements = estimateMaxStringLengthPerRequest(s) return durationWithMaxLength - case "date", "date-time": - timestampWithMaxLength := newSimpleType("timestamp", cel.TimestampType, types.Timestamp{Time: time.Time{}}) + case "date": + timestampWithMaxLength := newSimpleTypeWithMinSize("timestamp", cel.TimestampType, types.Timestamp{Time: time.Time{}}, int64(dateSizeJSON)) + timestampWithMaxLength.MaxElements = estimateMaxStringLengthPerRequest(s) + return timestampWithMaxLength + case "date-time": + timestampWithMaxLength := newSimpleTypeWithMinSize("timestamp", cel.TimestampType, types.Timestamp{Time: time.Time{}}, int64(minDatetimeSizeJSON)) timestampWithMaxLength.MaxElements = estimateMaxStringLengthPerRequest(s) return timestampWithMaxLength } } - strWithMaxLength := newSimpleType("string", cel.StringType, types.String("")) + strWithMaxLength := newSimpleTypeWithMinSize("string", cel.StringType, types.String(""), minStringSize) if s.ValueValidation != nil && s.ValueValidation.MaxLength != nil { // multiply the user-provided max length by 4 in the case of an otherwise-untyped string // we do this because the OpenAPIv3 spec indicates that maxLength is specified in runes/code points, @@ -230,81 +252,17 @@ func WithTypeAndObjectMeta(s *schema.Structural) *schema.Structural { return result } -// MaxCardinality returns the maximum number of times data conforming to the schema could possibly exist in +// MaxCardinality returns the maximum number of times data conforming to the minimum size given could possibly exist in // an object serialized to JSON. For cases where a schema is contained under map or array schemas of unbounded // size, this can be used as an estimate as the worst case number of times data matching the schema could be repeated. // Note that this only assumes a single comma between data elements, so if the schema is contained under only maps, -// this estimates a higher cardinality that would be possible. -func MaxCardinality(s *schema.Structural) uint64 { - sz := estimateMinSizeJSON(s) + 1 // assume at least one comma between elements +// this estimates a higher cardinality that would be possible. DeclType.MinSerializedSize is meant to be passed to +// this function. +func MaxCardinality(minSize int64) uint64 { + sz := minSize + 1 // assume at least one comma between elements return uint64(maxRequestSizeBytes / sz) } -// estimateMinSizeJSON estimates the minimum size in bytes of the given schema when serialized in JSON. -// minLength/minProperties/minItems are not currently taken into account, so if these limits are set the -// minimum size might be higher than what estimateMinSizeJSON returns. -func estimateMinSizeJSON(s *schema.Structural) int64 { - if s == nil { - // minimum valid JSON token has length 1 (single-digit number like `0`) - return 1 - } - switch s.Type { - case "boolean": - // true - return 4 - case "number", "integer": - // 0 - return 1 - case "string": - if s.ValueValidation != nil { - switch s.ValueValidation.Format { - case "duration": - return minDurationSizeJSON - case "date": - return dateSizeJSON - case "date-time": - return minDatetimeSizeJSON - } - } - // "" - return 2 - case "array": - // [] - return 2 - case "object": - // {} - objSize := int64(2) - // exclude optional fields since the request can omit them - if s.ValueValidation != nil { - for _, propName := range s.ValueValidation.Required { - if prop, ok := s.Properties[propName]; ok { - if prop.Default.Object != nil { - // exclude fields with a default, those are filled in server-side - continue - } - // add 4, 2 for quotations around the property name, 1 for the colon, and 1 for a comma - objSize += int64(len(propName)) + estimateMinSizeJSON(&prop) + 4 - } - } - } - return objSize - } - if s.XIntOrString { - // 0 - return 1 - } - // this code should be unreachable, so return the safest possible value considering this can be used as - // a divisor - return 1 -} - -// estimateMaxArrayItemsPerRequest estimates the maximum number of array items with -// the provided schema that can fit into a single request. -func estimateMaxArrayItemsPerRequest(itemSchema *schema.Structural) int64 { - // subtract 2 to account for [ and ] - return (maxRequestSizeBytes - 2) / (estimateMinSizeJSON(itemSchema) + 1) -} - // estimateMaxStringLengthPerRequest estimates the maximum string length (in characters) // of a string compatible with the format requirements in the provided schema. // must only be called on schemas of type "string" or x-kubernetes-int-or-string: true @@ -326,12 +284,19 @@ func estimateMaxStringLengthPerRequest(s *schema.Structural) int64 { } } +// estimateMaxArrayItemsPerRequest estimates the maximum number of array items with +// the provided minimum serialized size that can fit into a single request. +func estimateMaxArrayItemsFromMinSize(minSize int64) int64 { + // subtract 2 to account for [ and ] + return (maxRequestSizeBytes - 2) / (minSize + 1) +} + // estimateMaxAdditionalPropertiesPerRequest estimates the maximum number of additional properties -// with the provided schema that can fit into a single request. -func estimateMaxAdditionalPropertiesPerRequest(additionalPropertiesSchema *schema.Structural) int64 { +// with the provided minimum serialized size that can fit into a single request. +func estimateMaxAdditionalPropertiesFromMinSize(minSize int64) int64 { // 2 bytes for key + "" + colon + comma + smallest possible value, realistically the actual keys // will all vary in length - keyValuePairSize := estimateMinSizeJSON(additionalPropertiesSchema) + 6 + keyValuePairSize := minSize + 6 // subtract 2 to account for { and } return (maxRequestSizeBytes - 2) / keyValuePairSize } diff --git a/staging/src/k8s.io/apiextensions-apiserver/third_party/forked/celopenapi/model/schemas_test.go b/staging/src/k8s.io/apiextensions-apiserver/third_party/forked/celopenapi/model/schemas_test.go index 6aff03d2e3d..138b7f6fd1f 100644 --- a/staging/src/k8s.io/apiextensions-apiserver/third_party/forked/celopenapi/model/schemas_test.go +++ b/staging/src/k8s.io/apiextensions-apiserver/third_party/forked/celopenapi/model/schemas_test.go @@ -510,3 +510,32 @@ func TestEstimateMaxLengthJSON(t *testing.T) { func maxPtr(max int64) *int64 { return &max } + +func genNestedSchema(depth int) *schema.Structural { + var generator func(d int) schema.Structural + generator = func(d int) schema.Structural { + nodeTemplate := schema.Structural{ + Generic: schema.Generic{ + Type: "object", + AdditionalProperties: &schema.StructuralOrBool{}, + }, + } + if d == 1 { + return nodeTemplate + } else { + mapType := generator(d - 1) + nodeTemplate.Generic.AdditionalProperties.Structural = &mapType + return nodeTemplate + } + } + schema := generator(depth) + return &schema +} + +func BenchmarkDeeplyNestedSchemaDeclType(b *testing.B) { + benchmarkSchema := genNestedSchema(10) + b.ResetTimer() + for i := 0; i < b.N; i++ { + SchemaDeclType(benchmarkSchema, false) + } +} diff --git a/staging/src/k8s.io/apiextensions-apiserver/third_party/forked/celopenapi/model/types.go b/staging/src/k8s.io/apiextensions-apiserver/third_party/forked/celopenapi/model/types.go index 621b77f41da..7c77a3247dd 100644 --- a/staging/src/k8s.io/apiextensions-apiserver/third_party/forked/celopenapi/model/types.go +++ b/staging/src/k8s.io/apiextensions-apiserver/third_party/forked/celopenapi/model/types.go @@ -40,6 +40,9 @@ func NewListType(elem *DeclType, maxItems int64) *DeclType { MaxElements: maxItems, celType: cel.ListType(elem.CelType()), defaultValue: NewListValue(), + // a list can always be represented as [] in JSON, so hardcode the min size + // to 2 + MinSerializedSize: 2, } } @@ -52,6 +55,9 @@ func NewMapType(key, elem *DeclType, maxProperties int64) *DeclType { MaxElements: maxProperties, celType: cel.MapType(key.CelType(), elem.CelType()), defaultValue: NewMapValue(), + // a map can always be represented as {} in JSON, so hardcode the min size + // to 2 + MinSerializedSize: 2, } } @@ -62,16 +68,21 @@ func NewObjectType(name string, fields map[string]*DeclField) *DeclType { Fields: fields, celType: cel.ObjectType(name), traitMask: traits.FieldTesterType | traits.IndexerType, + // an object could potentially be larger than the min size we default to here ({}), + // but we rely upon the caller to change MinSerializedSize accordingly if they add + // properties to the object + MinSerializedSize: 2, } t.defaultValue = NewObjectValue(t) return t } -func newSimpleType(name string, celType *cel.Type, zeroVal ref.Val) *DeclType { +func newSimpleTypeWithMinSize(name string, celType *cel.Type, zeroVal ref.Val, minSize int64) *DeclType { return &DeclType{ - name: name, - celType: celType, + name: name, + celType: celType, defaultValue: zeroVal, + MinSerializedSize: minSize, } } @@ -87,6 +98,9 @@ type DeclType struct { TypeParam bool Metadata map[string]string MaxElements int64 + // MinSerializedSize represents the smallest possible size in bytes that + // the DeclType could be serialized to in JSON. + MinSerializedSize int64 celType *cel.Type traitMask int @@ -127,15 +141,16 @@ func (t *DeclType) MaybeAssignTypeName(name string) *DeclType { return t } return &DeclType{ - name: name, - Fields: fieldMap, - KeyType: t.KeyType, - ElemType: t.ElemType, - TypeParam: t.TypeParam, - Metadata: t.Metadata, - celType: cel.ObjectType(name), - traitMask: t.traitMask, - defaultValue: t.defaultValue, + name: name, + Fields: fieldMap, + KeyType: t.KeyType, + ElemType: t.ElemType, + TypeParam: t.TypeParam, + Metadata: t.Metadata, + celType: cel.ObjectType(name), + traitMask: t.traitMask, + defaultValue: t.defaultValue, + MinSerializedSize: t.MinSerializedSize, } } if t.IsMap() { @@ -507,42 +522,44 @@ type schemaTypeProvider struct { var ( // AnyType is equivalent to the CEL 'protobuf.Any' type in that the value may have any of the // types supported. - AnyType = newSimpleType("any", cel.AnyType, nil) + AnyType = newSimpleTypeWithMinSize("any", cel.AnyType, nil, 1) // BoolType is equivalent to the CEL 'bool' type. - BoolType = newSimpleType("bool", cel.BoolType, types.False) + BoolType = newSimpleTypeWithMinSize("bool", cel.BoolType, types.False, minBoolSize) // BytesType is equivalent to the CEL 'bytes' type. - BytesType = newSimpleType("bytes", cel.BytesType, types.Bytes([]byte{})) + BytesType = newSimpleTypeWithMinSize("bytes", cel.BytesType, types.Bytes([]byte{}), minStringSize) // DoubleType is equivalent to the CEL 'double' type which is a 64-bit floating point value. - DoubleType = newSimpleType("double", cel.DoubleType, types.Double(0)) + DoubleType = newSimpleTypeWithMinSize("double", cel.DoubleType, types.Double(0), minNumberSize) // DurationType is equivalent to the CEL 'duration' type. - DurationType = newSimpleType("duration", cel.DurationType, types.Duration{Duration: time.Duration(0)}) + DurationType = newSimpleTypeWithMinSize("duration", cel.DurationType, types.Duration{Duration: time.Duration(0)}, minDurationSizeJSON) // DateType is equivalent to the CEL 'date' type. - DateType = newSimpleType("date", cel.TimestampType, types.Timestamp{Time: time.Time{}}) + DateType = newSimpleTypeWithMinSize("date", cel.TimestampType, types.Timestamp{Time: time.Time{}}, dateSizeJSON) // DynType is the equivalent of the CEL 'dyn' concept which indicates that the type will be // determined at runtime rather than compile time. - DynType = newSimpleType("dyn", cel.DynType, nil) + DynType = newSimpleTypeWithMinSize("dyn", cel.DynType, nil, 1) // IntType is equivalent to the CEL 'int' type which is a 64-bit signed int. - IntType = newSimpleType("int", cel.IntType, types.IntZero) + IntType = newSimpleTypeWithMinSize("int", cel.IntType, types.IntZero, minNumberSize) // NullType is equivalent to the CEL 'null_type'. - NullType = newSimpleType("null_type", cel.NullType, types.NullValue) + NullType = newSimpleTypeWithMinSize("null_type", cel.NullType, types.NullValue, 4) // StringType is equivalent to the CEL 'string' type which is expected to be a UTF-8 string. // StringType values may either be string literals or expression strings. - StringType = newSimpleType("string", cel.StringType, types.String("")) + StringType = newSimpleTypeWithMinSize("string", cel.StringType, types.String(""), minStringSize) // TimestampType corresponds to the well-known protobuf.Timestamp type supported within CEL. - TimestampType = newSimpleType("timestamp", cel.TimestampType, types.Timestamp{Time: time.Time{}}) + // Note that both the OpenAPI date and date-time types map onto TimestampType, so not all types + // labeled as Timestamp will necessarily have the same MinSerializedSize. + TimestampType = newSimpleTypeWithMinSize("timestamp", cel.TimestampType, types.Timestamp{Time: time.Time{}}, dateSizeJSON) // UintType is equivalent to the CEL 'uint' type. - UintType = newSimpleType("uint", cel.UintType, types.Uint(0)) + UintType = newSimpleTypeWithMinSize("uint", cel.UintType, types.Uint(0), 1) // ListType is equivalent to the CEL 'list' type. ListType = NewListType(AnyType, noMaxLength) diff --git a/staging/src/k8s.io/apiextensions-apiserver/third_party/forked/celopenapi/model/value.go b/staging/src/k8s.io/apiextensions-apiserver/third_party/forked/celopenapi/model/value.go index 79817bead74..9a537aba97f 100644 --- a/staging/src/k8s.io/apiextensions-apiserver/third_party/forked/celopenapi/model/value.go +++ b/staging/src/k8s.io/apiextensions-apiserver/third_party/forked/celopenapi/model/value.go @@ -766,4 +766,4 @@ func celBool(pred bool) ref.Val { return types.False } -var unknownType = &DeclType{name: "unknown"} +var unknownType = &DeclType{name: "unknown", MinSerializedSize: 1}