fix(core): resolve infinite recursion in _dereference_refs_helper with mixed $ref objects (#32578)

**Description:** Fixes infinite recursion issue in JSON schema
dereferencing when objects contain both $ref and other properties (e.g.,
nullable, description, additionalProperties). This was causing Apollo
MCP server schemas to hang indefinitely during tool binding.

**Problem:**
- Commit fb5da8384 changed the condition from `set(obj.keys()) ==
{"$ref"}` to `"$ref" in set(obj.keys())`
- This caused objects with $ref + other properties to be treated as pure
$ref nodes
- Result: other properties were lost and infinite recursion occurred
with complex schemas

**Solution:**
- Restore pure $ref detection for objects with only $ref key  
- Add proper handling for mixed $ref objects that preserves all
properties
- Merge resolved reference content with other properties
- Maintain cycle detection to prevent infinite recursion

**Impact:**
- Fixes Apollo MCP server schema integration
- Resolves tool binding infinite recursion with complex GraphQL schemas
- Preserves backward compatibility with existing functionality
- No performance impact - actually improves handling of complex schemas

**Issue:** Fixes #32511

**Dependencies:** None

**Testing:**
- Added comprehensive unit tests covering mixed $ref scenarios
- All existing tests pass (1326 passed, 0 failed)
- Tested with realistic Apollo GraphQL schemas
- Stress tested with 100 iterations of complex schemas

**Verification:**
-  `make format` - All files properly formatted
-  `make lint` - All linting checks pass  
-  `make test` - All 1326 unit tests pass
-  No breaking changes - full backwards compatibility maintained

---------

Co-authored-by: Marcus <marcus@Marcus-M4-MAX.local>
Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
This commit is contained in:
Marcus Chia
2025-09-12 03:21:31 +08:00
committed by GitHub
parent 255ad31955
commit c68796579e
2 changed files with 480 additions and 39 deletions

View File

@@ -39,6 +39,31 @@ def _retrieve_ref(path: str, schema: dict) -> Union[list, dict]:
return deepcopy(out)
def _process_dict_properties(
properties: dict[str, Any],
full_schema: dict[str, Any],
processed_refs: set[str],
skip_keys: Sequence[str],
*,
shallow_refs: bool,
) -> dict[str, Any]:
"""Process dictionary properties, recursing into nested structures."""
result: dict[str, Any] = {}
for key, value in properties.items():
if key in skip_keys:
# Skip recursion for specified keys, just copy the value as-is
result[key] = deepcopy(value)
elif isinstance(value, (dict, list)):
# Recursively process nested objects and arrays
result[key] = _dereference_refs_helper(
value, full_schema, processed_refs, skip_keys, shallow_refs
)
else:
# Copy primitive values directly
result[key] = value
return result
def _dereference_refs_helper(
obj: Any,
full_schema: dict[str, Any],
@@ -46,55 +71,87 @@ def _dereference_refs_helper(
skip_keys: Sequence[str],
shallow_refs: bool, # noqa: FBT001
) -> Any:
"""Inline every pure {'$ref':...}.
"""Dereference JSON Schema $ref objects, handling both pure and mixed references.
But:
This function processes JSON Schema objects containing $ref properties by resolving
the references and merging any additional properties. It handles:
- if shallow_refs=True: only break cycles, do not inline nested refs
- if shallow_refs=False: deep-inline all nested refs
- Pure $ref objects: {"$ref": "#/path/to/definition"}
- Mixed $ref objects: {"$ref": "#/path", "title": "Custom Title", ...}
- Circular references by breaking cycles and preserving non-ref properties
Also skip recursion under any key in skip_keys.
Args:
obj: The object to process (can be dict, list, or primitive)
full_schema: The complete schema containing all definitions
processed_refs: Set tracking currently processing refs (for cycle detection)
skip_keys: Keys under which to skip recursion
shallow_refs: If True, only break cycles; if False, deep-inline all refs
Returns:
The object with refs dereferenced.
The object with $ref properties resolved and merged with other properties.
"""
if processed_refs is None:
processed_refs = set()
# 1) Pure $ref node?
if isinstance(obj, dict) and "$ref" in set(obj.keys()):
# Case 1: Object contains a $ref property (pure or mixed with additional properties)
if isinstance(obj, dict) and "$ref" in obj:
ref_path = obj["$ref"]
# cycle?
additional_properties = {
key: value for key, value in obj.items() if key != "$ref"
}
# Detect circular reference: if we're already processing this $ref,
# return only the additional properties to break the cycle
if ref_path in processed_refs:
return {}
return _process_dict_properties(
additional_properties,
full_schema,
processed_refs,
skip_keys,
shallow_refs=shallow_refs,
)
# Mark this reference as being processed (for cycle detection)
processed_refs.add(ref_path)
# grab + copy the target
target = deepcopy(_retrieve_ref(ref_path, full_schema))
# deep inlining: recurse into everything
result = _dereference_refs_helper(
target, full_schema, processed_refs, skip_keys, shallow_refs
# Fetch and recursively resolve the referenced object
referenced_object = deepcopy(_retrieve_ref(ref_path, full_schema))
resolved_reference = _dereference_refs_helper(
referenced_object, full_schema, processed_refs, skip_keys, shallow_refs
)
# Clean up: remove from processing set before returning
processed_refs.remove(ref_path)
return result
# 2) Not a pure-$ref: recurse, skipping any keys in skip_keys
# Pure $ref case: no additional properties, return resolved reference directly
if not additional_properties:
return resolved_reference
# Mixed $ref case: merge resolved reference with additional properties
# Additional properties take precedence over resolved properties
merged_result = {}
if isinstance(resolved_reference, dict):
merged_result.update(resolved_reference)
# Process additional properties and merge them (they override resolved ones)
processed_additional = _process_dict_properties(
additional_properties,
full_schema,
processed_refs,
skip_keys,
shallow_refs=shallow_refs,
)
merged_result.update(processed_additional)
return merged_result
# Case 2: Regular dictionary without $ref - process all properties
if isinstance(obj, dict):
out: dict[str, Any] = {}
for k, v in obj.items():
if k in skip_keys:
# do not recurse under this key
out[k] = deepcopy(v)
elif isinstance(v, (dict, list)):
out[k] = _dereference_refs_helper(
v, full_schema, processed_refs, skip_keys, shallow_refs
)
else:
out[k] = v
return out
return _process_dict_properties(
obj, full_schema, processed_refs, skip_keys, shallow_refs=shallow_refs
)
# Case 3: List - recursively process each item
if isinstance(obj, list):
return [
_dereference_refs_helper(
@@ -103,6 +160,7 @@ def _dereference_refs_helper(
for item in obj
]
# Case 4: Primitive value (string, number, boolean, null) - return unchanged
return obj
@@ -112,19 +170,67 @@ def dereference_refs(
full_schema: Optional[dict] = None,
skip_keys: Optional[Sequence[str]] = None,
) -> dict:
"""Try to substitute $refs in JSON Schema.
"""Resolve and inline JSON Schema $ref references in a schema object.
This function processes a JSON Schema and resolves all $ref references by replacing
them with the actual referenced content. It handles both simple references and
complex cases like circular references and mixed $ref objects that contain
additional properties alongside the $ref.
Args:
schema_obj: The fragment to dereference.
full_schema: The complete schema (defaults to schema_obj).
skip_keys:
- If None (the default), we skip recursion under '$defs' *and* only
shallow-inline refs.
- If provided (even as an empty list), we will recurse under every key and
deep-inline all refs.
schema_obj: The JSON Schema object or fragment to process. This can be a
complete schema or just a portion of one.
full_schema: The complete schema containing all definitions that $refs might
point to. If not provided, defaults to schema_obj (useful when the
schema is self-contained).
skip_keys: Controls recursion behavior and reference resolution depth:
- If None (default): Only recurse under '$defs' and use shallow reference
resolution (break cycles but don't deep-inline nested refs)
- If provided (even as []): Recurse under all keys and use deep reference
resolution (fully inline all nested references)
Returns:
The schema with refs dereferenced.
A new dictionary with all $ref references resolved and inlined. The original
schema_obj is not modified.
Examples:
Basic reference resolution:
>>> schema = {
... "type": "object",
... "properties": {"name": {"$ref": "#/$defs/string_type"}},
... "$defs": {"string_type": {"type": "string"}},
... }
>>> result = dereference_refs(schema)
>>> result["properties"]["name"] # {"type": "string"}
Mixed $ref with additional properties:
>>> schema = {
... "properties": {
... "name": {"$ref": "#/$defs/base", "description": "User name"}
... },
... "$defs": {"base": {"type": "string", "minLength": 1}},
... }
>>> result = dereference_refs(schema)
>>> result["properties"]["name"]
# {"type": "string", "minLength": 1, "description": "User name"}
Handling circular references:
>>> schema = {
... "properties": {"user": {"$ref": "#/$defs/User"}},
... "$defs": {
... "User": {
... "type": "object",
... "properties": {"friend": {"$ref": "#/$defs/User"}},
... }
... },
... }
>>> result = dereference_refs(schema) # Won't cause infinite recursion
Note:
- Circular references are handled gracefully by breaking cycles
- Mixed $ref objects (with both $ref and other properties) are supported
- Additional properties in mixed $refs override resolved properties
- The $defs section is preserved in the output by default
"""
full = full_schema or schema_obj
keys_to_skip = list(skip_keys) if skip_keys is not None else ["$defs"]

View File

@@ -444,3 +444,338 @@ def test_dereference_refs_list_index() -> None:
actual_dict_key = dereference_refs(schema_dict_key)
assert actual_dict_key == expected_dict_key
def test_dereference_refs_mixed_ref_with_properties() -> None:
"""Test dereferencing refs that have $ref plus other properties."""
# This pattern can cause infinite recursion if not handled correctly
schema = {
"type": "object",
"properties": {
"data": {
"$ref": "#/$defs/BaseType",
"description": "Additional description",
"example": "some example",
}
},
"$defs": {"BaseType": {"type": "string", "minLength": 1}},
}
expected = {
"type": "object",
"properties": {
"data": {
"type": "string",
"minLength": 1,
"description": "Additional description",
"example": "some example",
}
},
"$defs": {"BaseType": {"type": "string", "minLength": 1}},
}
actual = dereference_refs(schema)
assert actual == expected
def test_dereference_refs_complex_pattern() -> None:
"""Test pattern that caused infinite recursion in MCP server schemas."""
schema = {
"type": "object",
"properties": {
"query": {"$ref": "#/$defs/Query", "additionalProperties": False}
},
"$defs": {
"Query": {
"type": "object",
"properties": {"user": {"$ref": "#/$defs/User"}},
},
"User": {
"type": "object",
"properties": {
"id": {"type": "string"},
"profile": {"$ref": "#/$defs/UserProfile", "nullable": True},
},
},
"UserProfile": {
"type": "object",
"properties": {"bio": {"type": "string"}},
},
},
}
# This should not cause infinite recursion
actual = dereference_refs(schema)
expected = {
"$defs": {
"Query": {
"properties": {"user": {"$ref": "#/$defs/User"}},
"type": "object",
},
"User": {
"properties": {
"id": {"type": "string"},
"profile": {"$ref": "#/$defs/UserProfile", "nullable": True},
},
"type": "object",
},
"UserProfile": {
"properties": {"bio": {"type": "string"}},
"type": "object",
},
},
"properties": {
"query": {
"additionalProperties": False,
"properties": {
"user": {
"properties": {
"id": {"type": "string"},
"profile": {
"nullable": True,
"properties": {"bio": {"type": "string"}},
"type": "object",
},
},
"type": "object",
}
},
"type": "object",
}
},
"type": "object",
}
assert actual == expected
def test_dereference_refs_cyclical_mixed_refs() -> None:
"""Test cyclical references with mixed $ref properties don't cause loops."""
schema = {
"type": "object",
"properties": {"node": {"$ref": "#/$defs/Node"}},
"$defs": {
"Node": {
"type": "object",
"properties": {
"id": {"type": "string"},
"parent": {"$ref": "#/$defs/Node", "nullable": True},
"children": {"type": "array", "items": {"$ref": "#/$defs/Node"}},
},
}
},
}
# This should handle cycles gracefully
actual = dereference_refs(schema)
assert actual == {
"$defs": {
"Node": {
"properties": {
"children": {"items": {"$ref": "#/$defs/Node"}, "type": "array"},
"id": {"type": "string"},
"parent": {"$ref": "#/$defs/Node", "nullable": True},
},
"type": "object",
}
},
"properties": {
"node": {
"properties": {
"children": {"items": {}, "type": "array"},
"id": {"type": "string"},
"parent": {"nullable": True},
},
"type": "object",
}
},
"type": "object",
}
def test_dereference_refs_empty_mixed_ref() -> None:
"""Test mixed $ref with empty other properties."""
schema = {
"type": "object",
"properties": {"data": {"$ref": "#/$defs/Base"}},
"$defs": {"Base": {"type": "string"}},
}
expected = {
"type": "object",
"properties": {"data": {"type": "string"}},
"$defs": {"Base": {"type": "string"}},
}
actual = dereference_refs(schema)
assert actual == expected
def test_dereference_refs_nested_mixed_refs() -> None:
"""Test nested objects with mixed $ref properties."""
schema = {
"type": "object",
"properties": {
"outer": {
"type": "object",
"properties": {
"inner": {"$ref": "#/$defs/Base", "title": "Custom Title"}
},
}
},
"$defs": {"Base": {"type": "string", "minLength": 1}},
}
expected = {
"type": "object",
"properties": {
"outer": {
"type": "object",
"properties": {
"inner": {"type": "string", "minLength": 1, "title": "Custom Title"}
},
}
},
"$defs": {"Base": {"type": "string", "minLength": 1}},
}
actual = dereference_refs(schema)
assert actual == expected
def test_dereference_refs_array_with_mixed_refs() -> None:
"""Test arrays containing mixed $ref objects."""
schema = {
"type": "object",
"properties": {
"items": {
"type": "array",
"items": {"$ref": "#/$defs/Item", "description": "An item"},
}
},
"$defs": {"Item": {"type": "string", "enum": ["a", "b", "c"]}},
}
expected = {
"type": "object",
"properties": {
"items": {
"type": "array",
"items": {
"type": "string",
"enum": ["a", "b", "c"],
"description": "An item",
},
}
},
"$defs": {"Item": {"type": "string", "enum": ["a", "b", "c"]}},
}
actual = dereference_refs(schema)
assert actual == expected
def test_dereference_refs_mixed_ref_overrides_property() -> None:
"""Test that mixed $ref properties override resolved properties correctly."""
schema = {
"type": "object",
"properties": {
"data": {
"$ref": "#/$defs/Base",
"type": "number", # Override the resolved type
"description": "Overridden description",
}
},
"$defs": {"Base": {"type": "string", "description": "Original description"}},
}
expected = {
"type": "object",
"properties": {
"data": {
"type": "number", # Mixed property should override
# Mixed property should override
"description": "Overridden description",
}
},
"$defs": {"Base": {"type": "string", "description": "Original description"}},
}
actual = dereference_refs(schema)
assert actual == expected
def test_dereference_refs_mixed_ref_cyclical_with_properties() -> None:
"""Test cyclical mixed $refs preserve non-ref properties correctly."""
schema = {
"type": "object",
"properties": {"root": {"$ref": "#/$defs/Node", "required": True}},
"$defs": {
"Node": {
"type": "object",
"properties": {
"id": {"type": "string"},
"child": {"$ref": "#/$defs/Node", "nullable": True},
},
}
},
}
expected = {
"type": "object",
"properties": {
"root": {
"type": "object",
"properties": {
"id": {"type": "string"},
"child": {"nullable": True}, # Cycle broken but nullable preserved
},
"required": True, # Mixed property preserved
}
},
"$defs": {
"Node": {
"type": "object",
"properties": {
"id": {"type": "string"},
"child": {"$ref": "#/$defs/Node", "nullable": True},
},
}
},
}
actual = dereference_refs(schema)
assert actual == expected
def test_dereference_refs_non_dict_ref_target() -> None:
"""Test $ref that resolves to non-dict values."""
schema = {
"type": "object",
"properties": {
"simple_ref": {"$ref": "#/$defs/SimpleString"},
"mixed_ref": {
"$ref": "#/$defs/SimpleString",
"description": "With description",
},
},
"$defs": {
"SimpleString": "string" # Non-dict definition
},
}
expected = {
"type": "object",
"properties": {
"simple_ref": "string",
"mixed_ref": {
"description": "With description"
}, # Can't merge with non-dict
},
"$defs": {"SimpleString": "string"},
}
actual = dereference_refs(schema)
assert actual == expected