mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-14 05:56:40 +00:00
fix(core): resolve infinite recursion in _dereference_refs_helper
with mixed $ref
objects (#32578)
**Description:** Fixes infinite recursion issue in JSON schema
dereferencing when objects contain both $ref and other properties (e.g.,
nullable, description, additionalProperties). This was causing Apollo
MCP server schemas to hang indefinitely during tool binding.
**Problem:**
- Commit fb5da8384
changed the condition from `set(obj.keys()) ==
{"$ref"}` to `"$ref" in set(obj.keys())`
- This caused objects with $ref + other properties to be treated as pure
$ref nodes
- Result: other properties were lost and infinite recursion occurred
with complex schemas
**Solution:**
- Restore pure $ref detection for objects with only $ref key
- Add proper handling for mixed $ref objects that preserves all
properties
- Merge resolved reference content with other properties
- Maintain cycle detection to prevent infinite recursion
**Impact:**
- Fixes Apollo MCP server schema integration
- Resolves tool binding infinite recursion with complex GraphQL schemas
- Preserves backward compatibility with existing functionality
- No performance impact - actually improves handling of complex schemas
**Issue:** Fixes #32511
**Dependencies:** None
**Testing:**
- Added comprehensive unit tests covering mixed $ref scenarios
- All existing tests pass (1326 passed, 0 failed)
- Tested with realistic Apollo GraphQL schemas
- Stress tested with 100 iterations of complex schemas
**Verification:**
- ✅ `make format` - All files properly formatted
- ✅ `make lint` - All linting checks pass
- ✅ `make test` - All 1326 unit tests pass
- ✅ No breaking changes - full backwards compatibility maintained
---------
Co-authored-by: Marcus <marcus@Marcus-M4-MAX.local>
Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
This commit is contained in:
@@ -39,6 +39,31 @@ def _retrieve_ref(path: str, schema: dict) -> Union[list, dict]:
|
||||
return deepcopy(out)
|
||||
|
||||
|
||||
def _process_dict_properties(
|
||||
properties: dict[str, Any],
|
||||
full_schema: dict[str, Any],
|
||||
processed_refs: set[str],
|
||||
skip_keys: Sequence[str],
|
||||
*,
|
||||
shallow_refs: bool,
|
||||
) -> dict[str, Any]:
|
||||
"""Process dictionary properties, recursing into nested structures."""
|
||||
result: dict[str, Any] = {}
|
||||
for key, value in properties.items():
|
||||
if key in skip_keys:
|
||||
# Skip recursion for specified keys, just copy the value as-is
|
||||
result[key] = deepcopy(value)
|
||||
elif isinstance(value, (dict, list)):
|
||||
# Recursively process nested objects and arrays
|
||||
result[key] = _dereference_refs_helper(
|
||||
value, full_schema, processed_refs, skip_keys, shallow_refs
|
||||
)
|
||||
else:
|
||||
# Copy primitive values directly
|
||||
result[key] = value
|
||||
return result
|
||||
|
||||
|
||||
def _dereference_refs_helper(
|
||||
obj: Any,
|
||||
full_schema: dict[str, Any],
|
||||
@@ -46,55 +71,87 @@ def _dereference_refs_helper(
|
||||
skip_keys: Sequence[str],
|
||||
shallow_refs: bool, # noqa: FBT001
|
||||
) -> Any:
|
||||
"""Inline every pure {'$ref':...}.
|
||||
"""Dereference JSON Schema $ref objects, handling both pure and mixed references.
|
||||
|
||||
But:
|
||||
This function processes JSON Schema objects containing $ref properties by resolving
|
||||
the references and merging any additional properties. It handles:
|
||||
|
||||
- if shallow_refs=True: only break cycles, do not inline nested refs
|
||||
- if shallow_refs=False: deep-inline all nested refs
|
||||
- Pure $ref objects: {"$ref": "#/path/to/definition"}
|
||||
- Mixed $ref objects: {"$ref": "#/path", "title": "Custom Title", ...}
|
||||
- Circular references by breaking cycles and preserving non-ref properties
|
||||
|
||||
Also skip recursion under any key in skip_keys.
|
||||
Args:
|
||||
obj: The object to process (can be dict, list, or primitive)
|
||||
full_schema: The complete schema containing all definitions
|
||||
processed_refs: Set tracking currently processing refs (for cycle detection)
|
||||
skip_keys: Keys under which to skip recursion
|
||||
shallow_refs: If True, only break cycles; if False, deep-inline all refs
|
||||
|
||||
Returns:
|
||||
The object with refs dereferenced.
|
||||
The object with $ref properties resolved and merged with other properties.
|
||||
"""
|
||||
if processed_refs is None:
|
||||
processed_refs = set()
|
||||
|
||||
# 1) Pure $ref node?
|
||||
if isinstance(obj, dict) and "$ref" in set(obj.keys()):
|
||||
# Case 1: Object contains a $ref property (pure or mixed with additional properties)
|
||||
if isinstance(obj, dict) and "$ref" in obj:
|
||||
ref_path = obj["$ref"]
|
||||
# cycle?
|
||||
additional_properties = {
|
||||
key: value for key, value in obj.items() if key != "$ref"
|
||||
}
|
||||
|
||||
# Detect circular reference: if we're already processing this $ref,
|
||||
# return only the additional properties to break the cycle
|
||||
if ref_path in processed_refs:
|
||||
return {}
|
||||
return _process_dict_properties(
|
||||
additional_properties,
|
||||
full_schema,
|
||||
processed_refs,
|
||||
skip_keys,
|
||||
shallow_refs=shallow_refs,
|
||||
)
|
||||
|
||||
# Mark this reference as being processed (for cycle detection)
|
||||
processed_refs.add(ref_path)
|
||||
|
||||
# grab + copy the target
|
||||
target = deepcopy(_retrieve_ref(ref_path, full_schema))
|
||||
|
||||
# deep inlining: recurse into everything
|
||||
result = _dereference_refs_helper(
|
||||
target, full_schema, processed_refs, skip_keys, shallow_refs
|
||||
# Fetch and recursively resolve the referenced object
|
||||
referenced_object = deepcopy(_retrieve_ref(ref_path, full_schema))
|
||||
resolved_reference = _dereference_refs_helper(
|
||||
referenced_object, full_schema, processed_refs, skip_keys, shallow_refs
|
||||
)
|
||||
|
||||
# Clean up: remove from processing set before returning
|
||||
processed_refs.remove(ref_path)
|
||||
return result
|
||||
|
||||
# 2) Not a pure-$ref: recurse, skipping any keys in skip_keys
|
||||
# Pure $ref case: no additional properties, return resolved reference directly
|
||||
if not additional_properties:
|
||||
return resolved_reference
|
||||
|
||||
# Mixed $ref case: merge resolved reference with additional properties
|
||||
# Additional properties take precedence over resolved properties
|
||||
merged_result = {}
|
||||
if isinstance(resolved_reference, dict):
|
||||
merged_result.update(resolved_reference)
|
||||
|
||||
# Process additional properties and merge them (they override resolved ones)
|
||||
processed_additional = _process_dict_properties(
|
||||
additional_properties,
|
||||
full_schema,
|
||||
processed_refs,
|
||||
skip_keys,
|
||||
shallow_refs=shallow_refs,
|
||||
)
|
||||
merged_result.update(processed_additional)
|
||||
|
||||
return merged_result
|
||||
|
||||
# Case 2: Regular dictionary without $ref - process all properties
|
||||
if isinstance(obj, dict):
|
||||
out: dict[str, Any] = {}
|
||||
for k, v in obj.items():
|
||||
if k in skip_keys:
|
||||
# do not recurse under this key
|
||||
out[k] = deepcopy(v)
|
||||
elif isinstance(v, (dict, list)):
|
||||
out[k] = _dereference_refs_helper(
|
||||
v, full_schema, processed_refs, skip_keys, shallow_refs
|
||||
)
|
||||
else:
|
||||
out[k] = v
|
||||
return out
|
||||
return _process_dict_properties(
|
||||
obj, full_schema, processed_refs, skip_keys, shallow_refs=shallow_refs
|
||||
)
|
||||
|
||||
# Case 3: List - recursively process each item
|
||||
if isinstance(obj, list):
|
||||
return [
|
||||
_dereference_refs_helper(
|
||||
@@ -103,6 +160,7 @@ def _dereference_refs_helper(
|
||||
for item in obj
|
||||
]
|
||||
|
||||
# Case 4: Primitive value (string, number, boolean, null) - return unchanged
|
||||
return obj
|
||||
|
||||
|
||||
@@ -112,19 +170,67 @@ def dereference_refs(
|
||||
full_schema: Optional[dict] = None,
|
||||
skip_keys: Optional[Sequence[str]] = None,
|
||||
) -> dict:
|
||||
"""Try to substitute $refs in JSON Schema.
|
||||
"""Resolve and inline JSON Schema $ref references in a schema object.
|
||||
|
||||
This function processes a JSON Schema and resolves all $ref references by replacing
|
||||
them with the actual referenced content. It handles both simple references and
|
||||
complex cases like circular references and mixed $ref objects that contain
|
||||
additional properties alongside the $ref.
|
||||
|
||||
Args:
|
||||
schema_obj: The fragment to dereference.
|
||||
full_schema: The complete schema (defaults to schema_obj).
|
||||
skip_keys:
|
||||
- If None (the default), we skip recursion under '$defs' *and* only
|
||||
shallow-inline refs.
|
||||
- If provided (even as an empty list), we will recurse under every key and
|
||||
deep-inline all refs.
|
||||
schema_obj: The JSON Schema object or fragment to process. This can be a
|
||||
complete schema or just a portion of one.
|
||||
full_schema: The complete schema containing all definitions that $refs might
|
||||
point to. If not provided, defaults to schema_obj (useful when the
|
||||
schema is self-contained).
|
||||
skip_keys: Controls recursion behavior and reference resolution depth:
|
||||
- If None (default): Only recurse under '$defs' and use shallow reference
|
||||
resolution (break cycles but don't deep-inline nested refs)
|
||||
- If provided (even as []): Recurse under all keys and use deep reference
|
||||
resolution (fully inline all nested references)
|
||||
|
||||
Returns:
|
||||
The schema with refs dereferenced.
|
||||
A new dictionary with all $ref references resolved and inlined. The original
|
||||
schema_obj is not modified.
|
||||
|
||||
Examples:
|
||||
Basic reference resolution:
|
||||
>>> schema = {
|
||||
... "type": "object",
|
||||
... "properties": {"name": {"$ref": "#/$defs/string_type"}},
|
||||
... "$defs": {"string_type": {"type": "string"}},
|
||||
... }
|
||||
>>> result = dereference_refs(schema)
|
||||
>>> result["properties"]["name"] # {"type": "string"}
|
||||
|
||||
Mixed $ref with additional properties:
|
||||
>>> schema = {
|
||||
... "properties": {
|
||||
... "name": {"$ref": "#/$defs/base", "description": "User name"}
|
||||
... },
|
||||
... "$defs": {"base": {"type": "string", "minLength": 1}},
|
||||
... }
|
||||
>>> result = dereference_refs(schema)
|
||||
>>> result["properties"]["name"]
|
||||
# {"type": "string", "minLength": 1, "description": "User name"}
|
||||
|
||||
Handling circular references:
|
||||
>>> schema = {
|
||||
... "properties": {"user": {"$ref": "#/$defs/User"}},
|
||||
... "$defs": {
|
||||
... "User": {
|
||||
... "type": "object",
|
||||
... "properties": {"friend": {"$ref": "#/$defs/User"}},
|
||||
... }
|
||||
... },
|
||||
... }
|
||||
>>> result = dereference_refs(schema) # Won't cause infinite recursion
|
||||
|
||||
Note:
|
||||
- Circular references are handled gracefully by breaking cycles
|
||||
- Mixed $ref objects (with both $ref and other properties) are supported
|
||||
- Additional properties in mixed $refs override resolved properties
|
||||
- The $defs section is preserved in the output by default
|
||||
"""
|
||||
full = full_schema or schema_obj
|
||||
keys_to_skip = list(skip_keys) if skip_keys is not None else ["$defs"]
|
||||
|
@@ -444,3 +444,338 @@ def test_dereference_refs_list_index() -> None:
|
||||
|
||||
actual_dict_key = dereference_refs(schema_dict_key)
|
||||
assert actual_dict_key == expected_dict_key
|
||||
|
||||
|
||||
def test_dereference_refs_mixed_ref_with_properties() -> None:
|
||||
"""Test dereferencing refs that have $ref plus other properties."""
|
||||
# This pattern can cause infinite recursion if not handled correctly
|
||||
schema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"data": {
|
||||
"$ref": "#/$defs/BaseType",
|
||||
"description": "Additional description",
|
||||
"example": "some example",
|
||||
}
|
||||
},
|
||||
"$defs": {"BaseType": {"type": "string", "minLength": 1}},
|
||||
}
|
||||
|
||||
expected = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"data": {
|
||||
"type": "string",
|
||||
"minLength": 1,
|
||||
"description": "Additional description",
|
||||
"example": "some example",
|
||||
}
|
||||
},
|
||||
"$defs": {"BaseType": {"type": "string", "minLength": 1}},
|
||||
}
|
||||
|
||||
actual = dereference_refs(schema)
|
||||
assert actual == expected
|
||||
|
||||
|
||||
def test_dereference_refs_complex_pattern() -> None:
|
||||
"""Test pattern that caused infinite recursion in MCP server schemas."""
|
||||
schema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {"$ref": "#/$defs/Query", "additionalProperties": False}
|
||||
},
|
||||
"$defs": {
|
||||
"Query": {
|
||||
"type": "object",
|
||||
"properties": {"user": {"$ref": "#/$defs/User"}},
|
||||
},
|
||||
"User": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {"type": "string"},
|
||||
"profile": {"$ref": "#/$defs/UserProfile", "nullable": True},
|
||||
},
|
||||
},
|
||||
"UserProfile": {
|
||||
"type": "object",
|
||||
"properties": {"bio": {"type": "string"}},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
# This should not cause infinite recursion
|
||||
actual = dereference_refs(schema)
|
||||
|
||||
expected = {
|
||||
"$defs": {
|
||||
"Query": {
|
||||
"properties": {"user": {"$ref": "#/$defs/User"}},
|
||||
"type": "object",
|
||||
},
|
||||
"User": {
|
||||
"properties": {
|
||||
"id": {"type": "string"},
|
||||
"profile": {"$ref": "#/$defs/UserProfile", "nullable": True},
|
||||
},
|
||||
"type": "object",
|
||||
},
|
||||
"UserProfile": {
|
||||
"properties": {"bio": {"type": "string"}},
|
||||
"type": "object",
|
||||
},
|
||||
},
|
||||
"properties": {
|
||||
"query": {
|
||||
"additionalProperties": False,
|
||||
"properties": {
|
||||
"user": {
|
||||
"properties": {
|
||||
"id": {"type": "string"},
|
||||
"profile": {
|
||||
"nullable": True,
|
||||
"properties": {"bio": {"type": "string"}},
|
||||
"type": "object",
|
||||
},
|
||||
},
|
||||
"type": "object",
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
}
|
||||
|
||||
assert actual == expected
|
||||
|
||||
|
||||
def test_dereference_refs_cyclical_mixed_refs() -> None:
|
||||
"""Test cyclical references with mixed $ref properties don't cause loops."""
|
||||
schema = {
|
||||
"type": "object",
|
||||
"properties": {"node": {"$ref": "#/$defs/Node"}},
|
||||
"$defs": {
|
||||
"Node": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {"type": "string"},
|
||||
"parent": {"$ref": "#/$defs/Node", "nullable": True},
|
||||
"children": {"type": "array", "items": {"$ref": "#/$defs/Node"}},
|
||||
},
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
# This should handle cycles gracefully
|
||||
actual = dereference_refs(schema)
|
||||
|
||||
assert actual == {
|
||||
"$defs": {
|
||||
"Node": {
|
||||
"properties": {
|
||||
"children": {"items": {"$ref": "#/$defs/Node"}, "type": "array"},
|
||||
"id": {"type": "string"},
|
||||
"parent": {"$ref": "#/$defs/Node", "nullable": True},
|
||||
},
|
||||
"type": "object",
|
||||
}
|
||||
},
|
||||
"properties": {
|
||||
"node": {
|
||||
"properties": {
|
||||
"children": {"items": {}, "type": "array"},
|
||||
"id": {"type": "string"},
|
||||
"parent": {"nullable": True},
|
||||
},
|
||||
"type": "object",
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
}
|
||||
|
||||
|
||||
def test_dereference_refs_empty_mixed_ref() -> None:
|
||||
"""Test mixed $ref with empty other properties."""
|
||||
schema = {
|
||||
"type": "object",
|
||||
"properties": {"data": {"$ref": "#/$defs/Base"}},
|
||||
"$defs": {"Base": {"type": "string"}},
|
||||
}
|
||||
|
||||
expected = {
|
||||
"type": "object",
|
||||
"properties": {"data": {"type": "string"}},
|
||||
"$defs": {"Base": {"type": "string"}},
|
||||
}
|
||||
|
||||
actual = dereference_refs(schema)
|
||||
assert actual == expected
|
||||
|
||||
|
||||
def test_dereference_refs_nested_mixed_refs() -> None:
|
||||
"""Test nested objects with mixed $ref properties."""
|
||||
schema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"outer": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"inner": {"$ref": "#/$defs/Base", "title": "Custom Title"}
|
||||
},
|
||||
}
|
||||
},
|
||||
"$defs": {"Base": {"type": "string", "minLength": 1}},
|
||||
}
|
||||
|
||||
expected = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"outer": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"inner": {"type": "string", "minLength": 1, "title": "Custom Title"}
|
||||
},
|
||||
}
|
||||
},
|
||||
"$defs": {"Base": {"type": "string", "minLength": 1}},
|
||||
}
|
||||
|
||||
actual = dereference_refs(schema)
|
||||
assert actual == expected
|
||||
|
||||
|
||||
def test_dereference_refs_array_with_mixed_refs() -> None:
|
||||
"""Test arrays containing mixed $ref objects."""
|
||||
schema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {"$ref": "#/$defs/Item", "description": "An item"},
|
||||
}
|
||||
},
|
||||
"$defs": {"Item": {"type": "string", "enum": ["a", "b", "c"]}},
|
||||
}
|
||||
|
||||
expected = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"enum": ["a", "b", "c"],
|
||||
"description": "An item",
|
||||
},
|
||||
}
|
||||
},
|
||||
"$defs": {"Item": {"type": "string", "enum": ["a", "b", "c"]}},
|
||||
}
|
||||
|
||||
actual = dereference_refs(schema)
|
||||
assert actual == expected
|
||||
|
||||
|
||||
def test_dereference_refs_mixed_ref_overrides_property() -> None:
|
||||
"""Test that mixed $ref properties override resolved properties correctly."""
|
||||
schema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"data": {
|
||||
"$ref": "#/$defs/Base",
|
||||
"type": "number", # Override the resolved type
|
||||
"description": "Overridden description",
|
||||
}
|
||||
},
|
||||
"$defs": {"Base": {"type": "string", "description": "Original description"}},
|
||||
}
|
||||
|
||||
expected = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"data": {
|
||||
"type": "number", # Mixed property should override
|
||||
# Mixed property should override
|
||||
"description": "Overridden description",
|
||||
}
|
||||
},
|
||||
"$defs": {"Base": {"type": "string", "description": "Original description"}},
|
||||
}
|
||||
|
||||
actual = dereference_refs(schema)
|
||||
assert actual == expected
|
||||
|
||||
|
||||
def test_dereference_refs_mixed_ref_cyclical_with_properties() -> None:
|
||||
"""Test cyclical mixed $refs preserve non-ref properties correctly."""
|
||||
schema = {
|
||||
"type": "object",
|
||||
"properties": {"root": {"$ref": "#/$defs/Node", "required": True}},
|
||||
"$defs": {
|
||||
"Node": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {"type": "string"},
|
||||
"child": {"$ref": "#/$defs/Node", "nullable": True},
|
||||
},
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
expected = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"root": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {"type": "string"},
|
||||
"child": {"nullable": True}, # Cycle broken but nullable preserved
|
||||
},
|
||||
"required": True, # Mixed property preserved
|
||||
}
|
||||
},
|
||||
"$defs": {
|
||||
"Node": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {"type": "string"},
|
||||
"child": {"$ref": "#/$defs/Node", "nullable": True},
|
||||
},
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
actual = dereference_refs(schema)
|
||||
assert actual == expected
|
||||
|
||||
|
||||
def test_dereference_refs_non_dict_ref_target() -> None:
|
||||
"""Test $ref that resolves to non-dict values."""
|
||||
schema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"simple_ref": {"$ref": "#/$defs/SimpleString"},
|
||||
"mixed_ref": {
|
||||
"$ref": "#/$defs/SimpleString",
|
||||
"description": "With description",
|
||||
},
|
||||
},
|
||||
"$defs": {
|
||||
"SimpleString": "string" # Non-dict definition
|
||||
},
|
||||
}
|
||||
|
||||
expected = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"simple_ref": "string",
|
||||
"mixed_ref": {
|
||||
"description": "With description"
|
||||
}, # Can't merge with non-dict
|
||||
},
|
||||
"$defs": {"SimpleString": "string"},
|
||||
}
|
||||
|
||||
actual = dereference_refs(schema)
|
||||
assert actual == expected
|
||||
|
Reference in New Issue
Block a user