Merge pull request #111156 from DangerOnTheRanger/cel-traversal-optimization

Remove estimateMinSizeJSON calls for CEL
This commit is contained in:
Kubernetes Prow Robot 2022-07-25 18:33:53 -07:00 committed by GitHub
commit 8f30a3bb29
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 122 additions and 111 deletions

View File

@ -141,7 +141,7 @@ func Compile(s *schema.Structural, declType *celmodel.DeclType, perCallLimit uin
estimator := newCostEstimator(root)
// compResults is the return value which saves a list of compilation results in the same order as x-kubernetes-validations rules.
compResults := make([]CompilationResult, len(celRules))
maxCardinality := celmodel.MaxCardinality(s)
maxCardinality := celmodel.MaxCardinality(root.MinSerializedSize)
for i, rule := range celRules {
compResults[i] = compileRule(rule, env, perCallLimit, estimator, maxCardinality)
}

View File

@ -44,6 +44,12 @@ const (
// RFC 3339 datetimes require a full date (YYYY-MM-DD) and full time (HH:MM:SS), and we add 3 for
// quotation marks like always in addition to the capital T that separates the date and time
minDatetimeSizeJSON = 21
// ""
minStringSize = 2
// true
minBoolSize = 4
// 0
minNumberSize = 1
)
// SchemaDeclType converts the structural schema to a CEL declaration, or returns nil if the
@ -72,8 +78,8 @@ func SchemaDeclType(s *schema.Structural, isResourceRoot bool) *DeclType {
// To validate requirements on both the int and string representation:
// `type(intOrStringField) == int ? intOrStringField < 5 : double(intOrStringField.replace('%', '')) < 0.5
//
dyn := newSimpleType("dyn", cel.DynType, nil)
// handle x-kubernetes-int-or-string by returning the max length of the largest possible string
dyn := newSimpleTypeWithMinSize("dyn", cel.DynType, nil, 1) // smallest value for a serialied x-kubernetes-int-or-string is 0
// handle x-kubernetes-int-or-string by returning the max length/min serialized size of the largest possible string
dyn.MaxElements = maxRequestSizeBytes - 2
return dyn
}
@ -92,15 +98,16 @@ func SchemaDeclType(s *schema.Structural, isResourceRoot bool) *DeclType {
case "array":
if s.Items != nil {
itemsType := SchemaDeclType(s.Items, s.Items.XEmbeddedResource)
if itemsType == nil {
return nil
}
var maxItems int64
if s.ValueValidation != nil && s.ValueValidation.MaxItems != nil {
maxItems = zeroIfNegative(*s.ValueValidation.MaxItems)
} else {
maxItems = estimateMaxArrayItemsPerRequest(s.Items)
}
if itemsType != nil {
return NewListType(itemsType, maxItems)
maxItems = estimateMaxArrayItemsFromMinSize(itemsType.MinSerializedSize)
}
return NewListType(itemsType, maxItems)
}
return nil
case "object":
@ -111,7 +118,7 @@ func SchemaDeclType(s *schema.Structural, isResourceRoot bool) *DeclType {
if s.ValueValidation != nil && s.ValueValidation.MaxProperties != nil {
maxProperties = zeroIfNegative(*s.ValueValidation.MaxProperties)
} else {
maxProperties = estimateMaxAdditionalPropertiesPerRequest(s.AdditionalProperties.Structural)
maxProperties = estimateMaxAdditionalPropertiesFromMinSize(propsType.MinSerializedSize)
}
return NewMapType(StringType, propsType, maxProperties)
}
@ -125,6 +132,8 @@ func SchemaDeclType(s *schema.Structural, isResourceRoot bool) *DeclType {
required[f] = true
}
}
// an object will always be serialized at least as {}, so account for that
minSerializedSize := int64(2)
for name, prop := range s.Properties {
var enumValues []interface{}
if prop.ValueValidation != nil {
@ -142,14 +151,23 @@ func SchemaDeclType(s *schema.Structural, isResourceRoot bool) *DeclType {
enumValues: enumValues, // Enum values are represented as strings in CEL
}
}
// the min serialized size for an object is 2 (for {}) plus the min size of all its required
// properties
// only include required properties without a default value; default values are filled in
// server-side
if required[name] && prop.Default.Object == nil {
minSerializedSize += int64(len(name)) + fieldType.MinSerializedSize + 4
}
}
}
return NewObjectType("object", fields)
objType := NewObjectType("object", fields)
objType.MinSerializedSize = minSerializedSize
return objType
case "string":
if s.ValueValidation != nil {
switch s.ValueValidation.Format {
case "byte":
byteWithMaxLength := newSimpleType("bytes", cel.BytesType, types.Bytes([]byte{}))
byteWithMaxLength := newSimpleTypeWithMinSize("bytes", cel.BytesType, types.Bytes([]byte{}), minStringSize)
if s.ValueValidation.MaxLength != nil {
byteWithMaxLength.MaxElements = zeroIfNegative(*s.ValueValidation.MaxLength)
} else {
@ -157,16 +175,20 @@ func SchemaDeclType(s *schema.Structural, isResourceRoot bool) *DeclType {
}
return byteWithMaxLength
case "duration":
durationWithMaxLength := newSimpleType("duration", cel.DurationType, types.Duration{Duration: time.Duration(0)})
durationWithMaxLength := newSimpleTypeWithMinSize("duration", cel.DurationType, types.Duration{Duration: time.Duration(0)}, int64(minDurationSizeJSON))
durationWithMaxLength.MaxElements = estimateMaxStringLengthPerRequest(s)
return durationWithMaxLength
case "date", "date-time":
timestampWithMaxLength := newSimpleType("timestamp", cel.TimestampType, types.Timestamp{Time: time.Time{}})
case "date":
timestampWithMaxLength := newSimpleTypeWithMinSize("timestamp", cel.TimestampType, types.Timestamp{Time: time.Time{}}, int64(dateSizeJSON))
timestampWithMaxLength.MaxElements = estimateMaxStringLengthPerRequest(s)
return timestampWithMaxLength
case "date-time":
timestampWithMaxLength := newSimpleTypeWithMinSize("timestamp", cel.TimestampType, types.Timestamp{Time: time.Time{}}, int64(minDatetimeSizeJSON))
timestampWithMaxLength.MaxElements = estimateMaxStringLengthPerRequest(s)
return timestampWithMaxLength
}
}
strWithMaxLength := newSimpleType("string", cel.StringType, types.String(""))
strWithMaxLength := newSimpleTypeWithMinSize("string", cel.StringType, types.String(""), minStringSize)
if s.ValueValidation != nil && s.ValueValidation.MaxLength != nil {
// multiply the user-provided max length by 4 in the case of an otherwise-untyped string
// we do this because the OpenAPIv3 spec indicates that maxLength is specified in runes/code points,
@ -230,81 +252,17 @@ func WithTypeAndObjectMeta(s *schema.Structural) *schema.Structural {
return result
}
// MaxCardinality returns the maximum number of times data conforming to the schema could possibly exist in
// MaxCardinality returns the maximum number of times data conforming to the minimum size given could possibly exist in
// an object serialized to JSON. For cases where a schema is contained under map or array schemas of unbounded
// size, this can be used as an estimate as the worst case number of times data matching the schema could be repeated.
// Note that this only assumes a single comma between data elements, so if the schema is contained under only maps,
// this estimates a higher cardinality that would be possible.
func MaxCardinality(s *schema.Structural) uint64 {
sz := estimateMinSizeJSON(s) + 1 // assume at least one comma between elements
// this estimates a higher cardinality that would be possible. DeclType.MinSerializedSize is meant to be passed to
// this function.
func MaxCardinality(minSize int64) uint64 {
sz := minSize + 1 // assume at least one comma between elements
return uint64(maxRequestSizeBytes / sz)
}
// estimateMinSizeJSON estimates the minimum size in bytes of the given schema when serialized in JSON.
// minLength/minProperties/minItems are not currently taken into account, so if these limits are set the
// minimum size might be higher than what estimateMinSizeJSON returns.
func estimateMinSizeJSON(s *schema.Structural) int64 {
if s == nil {
// minimum valid JSON token has length 1 (single-digit number like `0`)
return 1
}
switch s.Type {
case "boolean":
// true
return 4
case "number", "integer":
// 0
return 1
case "string":
if s.ValueValidation != nil {
switch s.ValueValidation.Format {
case "duration":
return minDurationSizeJSON
case "date":
return dateSizeJSON
case "date-time":
return minDatetimeSizeJSON
}
}
// ""
return 2
case "array":
// []
return 2
case "object":
// {}
objSize := int64(2)
// exclude optional fields since the request can omit them
if s.ValueValidation != nil {
for _, propName := range s.ValueValidation.Required {
if prop, ok := s.Properties[propName]; ok {
if prop.Default.Object != nil {
// exclude fields with a default, those are filled in server-side
continue
}
// add 4, 2 for quotations around the property name, 1 for the colon, and 1 for a comma
objSize += int64(len(propName)) + estimateMinSizeJSON(&prop) + 4
}
}
}
return objSize
}
if s.XIntOrString {
// 0
return 1
}
// this code should be unreachable, so return the safest possible value considering this can be used as
// a divisor
return 1
}
// estimateMaxArrayItemsPerRequest estimates the maximum number of array items with
// the provided schema that can fit into a single request.
func estimateMaxArrayItemsPerRequest(itemSchema *schema.Structural) int64 {
// subtract 2 to account for [ and ]
return (maxRequestSizeBytes - 2) / (estimateMinSizeJSON(itemSchema) + 1)
}
// estimateMaxStringLengthPerRequest estimates the maximum string length (in characters)
// of a string compatible with the format requirements in the provided schema.
// must only be called on schemas of type "string" or x-kubernetes-int-or-string: true
@ -326,12 +284,19 @@ func estimateMaxStringLengthPerRequest(s *schema.Structural) int64 {
}
}
// estimateMaxArrayItemsPerRequest estimates the maximum number of array items with
// the provided minimum serialized size that can fit into a single request.
func estimateMaxArrayItemsFromMinSize(minSize int64) int64 {
// subtract 2 to account for [ and ]
return (maxRequestSizeBytes - 2) / (minSize + 1)
}
// estimateMaxAdditionalPropertiesPerRequest estimates the maximum number of additional properties
// with the provided schema that can fit into a single request.
func estimateMaxAdditionalPropertiesPerRequest(additionalPropertiesSchema *schema.Structural) int64 {
// with the provided minimum serialized size that can fit into a single request.
func estimateMaxAdditionalPropertiesFromMinSize(minSize int64) int64 {
// 2 bytes for key + "" + colon + comma + smallest possible value, realistically the actual keys
// will all vary in length
keyValuePairSize := estimateMinSizeJSON(additionalPropertiesSchema) + 6
keyValuePairSize := minSize + 6
// subtract 2 to account for { and }
return (maxRequestSizeBytes - 2) / keyValuePairSize
}

View File

@ -510,3 +510,32 @@ func TestEstimateMaxLengthJSON(t *testing.T) {
func maxPtr(max int64) *int64 {
return &max
}
func genNestedSchema(depth int) *schema.Structural {
var generator func(d int) schema.Structural
generator = func(d int) schema.Structural {
nodeTemplate := schema.Structural{
Generic: schema.Generic{
Type: "object",
AdditionalProperties: &schema.StructuralOrBool{},
},
}
if d == 1 {
return nodeTemplate
} else {
mapType := generator(d - 1)
nodeTemplate.Generic.AdditionalProperties.Structural = &mapType
return nodeTemplate
}
}
schema := generator(depth)
return &schema
}
func BenchmarkDeeplyNestedSchemaDeclType(b *testing.B) {
benchmarkSchema := genNestedSchema(10)
b.ResetTimer()
for i := 0; i < b.N; i++ {
SchemaDeclType(benchmarkSchema, false)
}
}

View File

@ -40,6 +40,9 @@ func NewListType(elem *DeclType, maxItems int64) *DeclType {
MaxElements: maxItems,
celType: cel.ListType(elem.CelType()),
defaultValue: NewListValue(),
// a list can always be represented as [] in JSON, so hardcode the min size
// to 2
MinSerializedSize: 2,
}
}
@ -52,6 +55,9 @@ func NewMapType(key, elem *DeclType, maxProperties int64) *DeclType {
MaxElements: maxProperties,
celType: cel.MapType(key.CelType(), elem.CelType()),
defaultValue: NewMapValue(),
// a map can always be represented as {} in JSON, so hardcode the min size
// to 2
MinSerializedSize: 2,
}
}
@ -62,16 +68,21 @@ func NewObjectType(name string, fields map[string]*DeclField) *DeclType {
Fields: fields,
celType: cel.ObjectType(name),
traitMask: traits.FieldTesterType | traits.IndexerType,
// an object could potentially be larger than the min size we default to here ({}),
// but we rely upon the caller to change MinSerializedSize accordingly if they add
// properties to the object
MinSerializedSize: 2,
}
t.defaultValue = NewObjectValue(t)
return t
}
func newSimpleType(name string, celType *cel.Type, zeroVal ref.Val) *DeclType {
func newSimpleTypeWithMinSize(name string, celType *cel.Type, zeroVal ref.Val, minSize int64) *DeclType {
return &DeclType{
name: name,
celType: celType,
name: name,
celType: celType,
defaultValue: zeroVal,
MinSerializedSize: minSize,
}
}
@ -87,6 +98,9 @@ type DeclType struct {
TypeParam bool
Metadata map[string]string
MaxElements int64
// MinSerializedSize represents the smallest possible size in bytes that
// the DeclType could be serialized to in JSON.
MinSerializedSize int64
celType *cel.Type
traitMask int
@ -127,15 +141,16 @@ func (t *DeclType) MaybeAssignTypeName(name string) *DeclType {
return t
}
return &DeclType{
name: name,
Fields: fieldMap,
KeyType: t.KeyType,
ElemType: t.ElemType,
TypeParam: t.TypeParam,
Metadata: t.Metadata,
celType: cel.ObjectType(name),
traitMask: t.traitMask,
defaultValue: t.defaultValue,
name: name,
Fields: fieldMap,
KeyType: t.KeyType,
ElemType: t.ElemType,
TypeParam: t.TypeParam,
Metadata: t.Metadata,
celType: cel.ObjectType(name),
traitMask: t.traitMask,
defaultValue: t.defaultValue,
MinSerializedSize: t.MinSerializedSize,
}
}
if t.IsMap() {
@ -507,42 +522,44 @@ type schemaTypeProvider struct {
var (
// AnyType is equivalent to the CEL 'protobuf.Any' type in that the value may have any of the
// types supported.
AnyType = newSimpleType("any", cel.AnyType, nil)
AnyType = newSimpleTypeWithMinSize("any", cel.AnyType, nil, 1)
// BoolType is equivalent to the CEL 'bool' type.
BoolType = newSimpleType("bool", cel.BoolType, types.False)
BoolType = newSimpleTypeWithMinSize("bool", cel.BoolType, types.False, minBoolSize)
// BytesType is equivalent to the CEL 'bytes' type.
BytesType = newSimpleType("bytes", cel.BytesType, types.Bytes([]byte{}))
BytesType = newSimpleTypeWithMinSize("bytes", cel.BytesType, types.Bytes([]byte{}), minStringSize)
// DoubleType is equivalent to the CEL 'double' type which is a 64-bit floating point value.
DoubleType = newSimpleType("double", cel.DoubleType, types.Double(0))
DoubleType = newSimpleTypeWithMinSize("double", cel.DoubleType, types.Double(0), minNumberSize)
// DurationType is equivalent to the CEL 'duration' type.
DurationType = newSimpleType("duration", cel.DurationType, types.Duration{Duration: time.Duration(0)})
DurationType = newSimpleTypeWithMinSize("duration", cel.DurationType, types.Duration{Duration: time.Duration(0)}, minDurationSizeJSON)
// DateType is equivalent to the CEL 'date' type.
DateType = newSimpleType("date", cel.TimestampType, types.Timestamp{Time: time.Time{}})
DateType = newSimpleTypeWithMinSize("date", cel.TimestampType, types.Timestamp{Time: time.Time{}}, dateSizeJSON)
// DynType is the equivalent of the CEL 'dyn' concept which indicates that the type will be
// determined at runtime rather than compile time.
DynType = newSimpleType("dyn", cel.DynType, nil)
DynType = newSimpleTypeWithMinSize("dyn", cel.DynType, nil, 1)
// IntType is equivalent to the CEL 'int' type which is a 64-bit signed int.
IntType = newSimpleType("int", cel.IntType, types.IntZero)
IntType = newSimpleTypeWithMinSize("int", cel.IntType, types.IntZero, minNumberSize)
// NullType is equivalent to the CEL 'null_type'.
NullType = newSimpleType("null_type", cel.NullType, types.NullValue)
NullType = newSimpleTypeWithMinSize("null_type", cel.NullType, types.NullValue, 4)
// StringType is equivalent to the CEL 'string' type which is expected to be a UTF-8 string.
// StringType values may either be string literals or expression strings.
StringType = newSimpleType("string", cel.StringType, types.String(""))
StringType = newSimpleTypeWithMinSize("string", cel.StringType, types.String(""), minStringSize)
// TimestampType corresponds to the well-known protobuf.Timestamp type supported within CEL.
TimestampType = newSimpleType("timestamp", cel.TimestampType, types.Timestamp{Time: time.Time{}})
// Note that both the OpenAPI date and date-time types map onto TimestampType, so not all types
// labeled as Timestamp will necessarily have the same MinSerializedSize.
TimestampType = newSimpleTypeWithMinSize("timestamp", cel.TimestampType, types.Timestamp{Time: time.Time{}}, dateSizeJSON)
// UintType is equivalent to the CEL 'uint' type.
UintType = newSimpleType("uint", cel.UintType, types.Uint(0))
UintType = newSimpleTypeWithMinSize("uint", cel.UintType, types.Uint(0), 1)
// ListType is equivalent to the CEL 'list' type.
ListType = NewListType(AnyType, noMaxLength)

View File

@ -766,4 +766,4 @@ func celBool(pred bool) ref.Val {
return types.False
}
var unknownType = &DeclType{name: "unknown"}
var unknownType = &DeclType{name: "unknown", MinSerializedSize: 1}