diff --git a/libs/core/langchain_core/utils/json_schema.py b/libs/core/langchain_core/utils/json_schema.py index 54c9a922d34..1baf93610b6 100644 --- a/libs/core/langchain_core/utils/json_schema.py +++ b/libs/core/langchain_core/utils/json_schema.py @@ -39,6 +39,31 @@ def _retrieve_ref(path: str, schema: dict) -> Union[list, dict]: return deepcopy(out) +def _process_dict_properties( + properties: dict[str, Any], + full_schema: dict[str, Any], + processed_refs: set[str], + skip_keys: Sequence[str], + *, + shallow_refs: bool, +) -> dict[str, Any]: + """Process dictionary properties, recursing into nested structures.""" + result: dict[str, Any] = {} + for key, value in properties.items(): + if key in skip_keys: + # Skip recursion for specified keys, just copy the value as-is + result[key] = deepcopy(value) + elif isinstance(value, (dict, list)): + # Recursively process nested objects and arrays + result[key] = _dereference_refs_helper( + value, full_schema, processed_refs, skip_keys, shallow_refs + ) + else: + # Copy primitive values directly + result[key] = value + return result + + def _dereference_refs_helper( obj: Any, full_schema: dict[str, Any], @@ -46,55 +71,87 @@ def _dereference_refs_helper( skip_keys: Sequence[str], shallow_refs: bool, # noqa: FBT001 ) -> Any: - """Inline every pure {'$ref':...}. + """Dereference JSON Schema $ref objects, handling both pure and mixed references. - But: + This function processes JSON Schema objects containing $ref properties by resolving + the references and merging any additional properties. It handles: - - if shallow_refs=True: only break cycles, do not inline nested refs - - if shallow_refs=False: deep-inline all nested refs + - Pure $ref objects: {"$ref": "#/path/to/definition"} + - Mixed $ref objects: {"$ref": "#/path", "title": "Custom Title", ...} + - Circular references by breaking cycles and preserving non-ref properties - Also skip recursion under any key in skip_keys. + Args: + obj: The object to process (can be dict, list, or primitive) + full_schema: The complete schema containing all definitions + processed_refs: Set tracking currently processing refs (for cycle detection) + skip_keys: Keys under which to skip recursion + shallow_refs: If True, only break cycles; if False, deep-inline all refs Returns: - The object with refs dereferenced. + The object with $ref properties resolved and merged with other properties. """ if processed_refs is None: processed_refs = set() - # 1) Pure $ref node? - if isinstance(obj, dict) and "$ref" in set(obj.keys()): + # Case 1: Object contains a $ref property (pure or mixed with additional properties) + if isinstance(obj, dict) and "$ref" in obj: ref_path = obj["$ref"] - # cycle? + additional_properties = { + key: value for key, value in obj.items() if key != "$ref" + } + + # Detect circular reference: if we're already processing this $ref, + # return only the additional properties to break the cycle if ref_path in processed_refs: - return {} + return _process_dict_properties( + additional_properties, + full_schema, + processed_refs, + skip_keys, + shallow_refs=shallow_refs, + ) + + # Mark this reference as being processed (for cycle detection) processed_refs.add(ref_path) - # grab + copy the target - target = deepcopy(_retrieve_ref(ref_path, full_schema)) - - # deep inlining: recurse into everything - result = _dereference_refs_helper( - target, full_schema, processed_refs, skip_keys, shallow_refs + # Fetch and recursively resolve the referenced object + referenced_object = deepcopy(_retrieve_ref(ref_path, full_schema)) + resolved_reference = _dereference_refs_helper( + referenced_object, full_schema, processed_refs, skip_keys, shallow_refs ) + # Clean up: remove from processing set before returning processed_refs.remove(ref_path) - return result - # 2) Not a pure-$ref: recurse, skipping any keys in skip_keys + # Pure $ref case: no additional properties, return resolved reference directly + if not additional_properties: + return resolved_reference + + # Mixed $ref case: merge resolved reference with additional properties + # Additional properties take precedence over resolved properties + merged_result = {} + if isinstance(resolved_reference, dict): + merged_result.update(resolved_reference) + + # Process additional properties and merge them (they override resolved ones) + processed_additional = _process_dict_properties( + additional_properties, + full_schema, + processed_refs, + skip_keys, + shallow_refs=shallow_refs, + ) + merged_result.update(processed_additional) + + return merged_result + + # Case 2: Regular dictionary without $ref - process all properties if isinstance(obj, dict): - out: dict[str, Any] = {} - for k, v in obj.items(): - if k in skip_keys: - # do not recurse under this key - out[k] = deepcopy(v) - elif isinstance(v, (dict, list)): - out[k] = _dereference_refs_helper( - v, full_schema, processed_refs, skip_keys, shallow_refs - ) - else: - out[k] = v - return out + return _process_dict_properties( + obj, full_schema, processed_refs, skip_keys, shallow_refs=shallow_refs + ) + # Case 3: List - recursively process each item if isinstance(obj, list): return [ _dereference_refs_helper( @@ -103,6 +160,7 @@ def _dereference_refs_helper( for item in obj ] + # Case 4: Primitive value (string, number, boolean, null) - return unchanged return obj @@ -112,19 +170,67 @@ def dereference_refs( full_schema: Optional[dict] = None, skip_keys: Optional[Sequence[str]] = None, ) -> dict: - """Try to substitute $refs in JSON Schema. + """Resolve and inline JSON Schema $ref references in a schema object. + + This function processes a JSON Schema and resolves all $ref references by replacing + them with the actual referenced content. It handles both simple references and + complex cases like circular references and mixed $ref objects that contain + additional properties alongside the $ref. Args: - schema_obj: The fragment to dereference. - full_schema: The complete schema (defaults to schema_obj). - skip_keys: - - If None (the default), we skip recursion under '$defs' *and* only - shallow-inline refs. - - If provided (even as an empty list), we will recurse under every key and - deep-inline all refs. + schema_obj: The JSON Schema object or fragment to process. This can be a + complete schema or just a portion of one. + full_schema: The complete schema containing all definitions that $refs might + point to. If not provided, defaults to schema_obj (useful when the + schema is self-contained). + skip_keys: Controls recursion behavior and reference resolution depth: + - If None (default): Only recurse under '$defs' and use shallow reference + resolution (break cycles but don't deep-inline nested refs) + - If provided (even as []): Recurse under all keys and use deep reference + resolution (fully inline all nested references) Returns: - The schema with refs dereferenced. + A new dictionary with all $ref references resolved and inlined. The original + schema_obj is not modified. + + Examples: + Basic reference resolution: + >>> schema = { + ... "type": "object", + ... "properties": {"name": {"$ref": "#/$defs/string_type"}}, + ... "$defs": {"string_type": {"type": "string"}}, + ... } + >>> result = dereference_refs(schema) + >>> result["properties"]["name"] # {"type": "string"} + + Mixed $ref with additional properties: + >>> schema = { + ... "properties": { + ... "name": {"$ref": "#/$defs/base", "description": "User name"} + ... }, + ... "$defs": {"base": {"type": "string", "minLength": 1}}, + ... } + >>> result = dereference_refs(schema) + >>> result["properties"]["name"] + # {"type": "string", "minLength": 1, "description": "User name"} + + Handling circular references: + >>> schema = { + ... "properties": {"user": {"$ref": "#/$defs/User"}}, + ... "$defs": { + ... "User": { + ... "type": "object", + ... "properties": {"friend": {"$ref": "#/$defs/User"}}, + ... } + ... }, + ... } + >>> result = dereference_refs(schema) # Won't cause infinite recursion + + Note: + - Circular references are handled gracefully by breaking cycles + - Mixed $ref objects (with both $ref and other properties) are supported + - Additional properties in mixed $refs override resolved properties + - The $defs section is preserved in the output by default """ full = full_schema or schema_obj keys_to_skip = list(skip_keys) if skip_keys is not None else ["$defs"] diff --git a/libs/core/tests/unit_tests/utils/test_json_schema.py b/libs/core/tests/unit_tests/utils/test_json_schema.py index 994d62064f6..b618e0d4b52 100644 --- a/libs/core/tests/unit_tests/utils/test_json_schema.py +++ b/libs/core/tests/unit_tests/utils/test_json_schema.py @@ -444,3 +444,338 @@ def test_dereference_refs_list_index() -> None: actual_dict_key = dereference_refs(schema_dict_key) assert actual_dict_key == expected_dict_key + + +def test_dereference_refs_mixed_ref_with_properties() -> None: + """Test dereferencing refs that have $ref plus other properties.""" + # This pattern can cause infinite recursion if not handled correctly + schema = { + "type": "object", + "properties": { + "data": { + "$ref": "#/$defs/BaseType", + "description": "Additional description", + "example": "some example", + } + }, + "$defs": {"BaseType": {"type": "string", "minLength": 1}}, + } + + expected = { + "type": "object", + "properties": { + "data": { + "type": "string", + "minLength": 1, + "description": "Additional description", + "example": "some example", + } + }, + "$defs": {"BaseType": {"type": "string", "minLength": 1}}, + } + + actual = dereference_refs(schema) + assert actual == expected + + +def test_dereference_refs_complex_pattern() -> None: + """Test pattern that caused infinite recursion in MCP server schemas.""" + schema = { + "type": "object", + "properties": { + "query": {"$ref": "#/$defs/Query", "additionalProperties": False} + }, + "$defs": { + "Query": { + "type": "object", + "properties": {"user": {"$ref": "#/$defs/User"}}, + }, + "User": { + "type": "object", + "properties": { + "id": {"type": "string"}, + "profile": {"$ref": "#/$defs/UserProfile", "nullable": True}, + }, + }, + "UserProfile": { + "type": "object", + "properties": {"bio": {"type": "string"}}, + }, + }, + } + + # This should not cause infinite recursion + actual = dereference_refs(schema) + + expected = { + "$defs": { + "Query": { + "properties": {"user": {"$ref": "#/$defs/User"}}, + "type": "object", + }, + "User": { + "properties": { + "id": {"type": "string"}, + "profile": {"$ref": "#/$defs/UserProfile", "nullable": True}, + }, + "type": "object", + }, + "UserProfile": { + "properties": {"bio": {"type": "string"}}, + "type": "object", + }, + }, + "properties": { + "query": { + "additionalProperties": False, + "properties": { + "user": { + "properties": { + "id": {"type": "string"}, + "profile": { + "nullable": True, + "properties": {"bio": {"type": "string"}}, + "type": "object", + }, + }, + "type": "object", + } + }, + "type": "object", + } + }, + "type": "object", + } + + assert actual == expected + + +def test_dereference_refs_cyclical_mixed_refs() -> None: + """Test cyclical references with mixed $ref properties don't cause loops.""" + schema = { + "type": "object", + "properties": {"node": {"$ref": "#/$defs/Node"}}, + "$defs": { + "Node": { + "type": "object", + "properties": { + "id": {"type": "string"}, + "parent": {"$ref": "#/$defs/Node", "nullable": True}, + "children": {"type": "array", "items": {"$ref": "#/$defs/Node"}}, + }, + } + }, + } + + # This should handle cycles gracefully + actual = dereference_refs(schema) + + assert actual == { + "$defs": { + "Node": { + "properties": { + "children": {"items": {"$ref": "#/$defs/Node"}, "type": "array"}, + "id": {"type": "string"}, + "parent": {"$ref": "#/$defs/Node", "nullable": True}, + }, + "type": "object", + } + }, + "properties": { + "node": { + "properties": { + "children": {"items": {}, "type": "array"}, + "id": {"type": "string"}, + "parent": {"nullable": True}, + }, + "type": "object", + } + }, + "type": "object", + } + + +def test_dereference_refs_empty_mixed_ref() -> None: + """Test mixed $ref with empty other properties.""" + schema = { + "type": "object", + "properties": {"data": {"$ref": "#/$defs/Base"}}, + "$defs": {"Base": {"type": "string"}}, + } + + expected = { + "type": "object", + "properties": {"data": {"type": "string"}}, + "$defs": {"Base": {"type": "string"}}, + } + + actual = dereference_refs(schema) + assert actual == expected + + +def test_dereference_refs_nested_mixed_refs() -> None: + """Test nested objects with mixed $ref properties.""" + schema = { + "type": "object", + "properties": { + "outer": { + "type": "object", + "properties": { + "inner": {"$ref": "#/$defs/Base", "title": "Custom Title"} + }, + } + }, + "$defs": {"Base": {"type": "string", "minLength": 1}}, + } + + expected = { + "type": "object", + "properties": { + "outer": { + "type": "object", + "properties": { + "inner": {"type": "string", "minLength": 1, "title": "Custom Title"} + }, + } + }, + "$defs": {"Base": {"type": "string", "minLength": 1}}, + } + + actual = dereference_refs(schema) + assert actual == expected + + +def test_dereference_refs_array_with_mixed_refs() -> None: + """Test arrays containing mixed $ref objects.""" + schema = { + "type": "object", + "properties": { + "items": { + "type": "array", + "items": {"$ref": "#/$defs/Item", "description": "An item"}, + } + }, + "$defs": {"Item": {"type": "string", "enum": ["a", "b", "c"]}}, + } + + expected = { + "type": "object", + "properties": { + "items": { + "type": "array", + "items": { + "type": "string", + "enum": ["a", "b", "c"], + "description": "An item", + }, + } + }, + "$defs": {"Item": {"type": "string", "enum": ["a", "b", "c"]}}, + } + + actual = dereference_refs(schema) + assert actual == expected + + +def test_dereference_refs_mixed_ref_overrides_property() -> None: + """Test that mixed $ref properties override resolved properties correctly.""" + schema = { + "type": "object", + "properties": { + "data": { + "$ref": "#/$defs/Base", + "type": "number", # Override the resolved type + "description": "Overridden description", + } + }, + "$defs": {"Base": {"type": "string", "description": "Original description"}}, + } + + expected = { + "type": "object", + "properties": { + "data": { + "type": "number", # Mixed property should override + # Mixed property should override + "description": "Overridden description", + } + }, + "$defs": {"Base": {"type": "string", "description": "Original description"}}, + } + + actual = dereference_refs(schema) + assert actual == expected + + +def test_dereference_refs_mixed_ref_cyclical_with_properties() -> None: + """Test cyclical mixed $refs preserve non-ref properties correctly.""" + schema = { + "type": "object", + "properties": {"root": {"$ref": "#/$defs/Node", "required": True}}, + "$defs": { + "Node": { + "type": "object", + "properties": { + "id": {"type": "string"}, + "child": {"$ref": "#/$defs/Node", "nullable": True}, + }, + } + }, + } + + expected = { + "type": "object", + "properties": { + "root": { + "type": "object", + "properties": { + "id": {"type": "string"}, + "child": {"nullable": True}, # Cycle broken but nullable preserved + }, + "required": True, # Mixed property preserved + } + }, + "$defs": { + "Node": { + "type": "object", + "properties": { + "id": {"type": "string"}, + "child": {"$ref": "#/$defs/Node", "nullable": True}, + }, + } + }, + } + + actual = dereference_refs(schema) + assert actual == expected + + +def test_dereference_refs_non_dict_ref_target() -> None: + """Test $ref that resolves to non-dict values.""" + schema = { + "type": "object", + "properties": { + "simple_ref": {"$ref": "#/$defs/SimpleString"}, + "mixed_ref": { + "$ref": "#/$defs/SimpleString", + "description": "With description", + }, + }, + "$defs": { + "SimpleString": "string" # Non-dict definition + }, + } + + expected = { + "type": "object", + "properties": { + "simple_ref": "string", + "mixed_ref": { + "description": "With description" + }, # Can't merge with non-dict + }, + "$defs": {"SimpleString": "string"}, + } + + actual = dereference_refs(schema) + assert actual == expected