mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-07 14:03:26 +00:00
fix(core): JSON Schema reference resolution for list indices (#32088)
Fixes #32042 ## Summary Fixes a critical bug in JSON Schema reference resolution that prevented correctly dereferencing numeric components in JSON pointer paths, specifically for list indices in `anyOf`, `oneOf`, and `allOf` arrays. ## Changes - Fixed `_retrieve_ref` function in `libs/core/langchain_core/utils/json_schema.py` to properly handle numeric components - Added comprehensive test function `test_dereference_refs_list_index()` in `libs/core/tests/unit_tests/utils/test_json_schema.py` - Resolved line length formatting issues - Improved type checking and index validation for list and dictionary references ## Key Improvements - Correctly handles list index references in JSON pointer paths - Maintains backward compatibility with existing dictionary numeric key functionality - Adds robust error handling for out-of-bounds and invalid indices - Passes all test cases covering various reference scenarios ## Test Coverage - Verified fix for `#/properties/payload/anyOf/1/properties/startDate` reference - Tested edge cases including out-of-bounds and negative indices - Ensured no regression in existing reference resolution functionality Resolves the reported issue with JSON Schema reference dereferencing for list indices. --------- Co-authored-by: open-swe-dev[bot] <open-swe-dev@users.noreply.github.com> Co-authored-by: Mason Daugherty <github@mdrxy.com> Co-authored-by: Mason Daugherty <mason@langchain.dev>
This commit is contained in:
@@ -21,8 +21,15 @@ def _retrieve_ref(path: str, schema: dict) -> dict:
|
|||||||
for component in components[1:]:
|
for component in components[1:]:
|
||||||
if component in out:
|
if component in out:
|
||||||
out = out[component]
|
out = out[component]
|
||||||
elif component.isdigit() and int(component) in out:
|
elif component.isdigit():
|
||||||
out = out[int(component)]
|
index = int(component)
|
||||||
|
if (isinstance(out, list) and 0 <= index < len(out)) or (
|
||||||
|
isinstance(out, dict) and index in out
|
||||||
|
):
|
||||||
|
out = out[index]
|
||||||
|
else:
|
||||||
|
msg = f"Reference '{path}' not found."
|
||||||
|
raise KeyError(msg)
|
||||||
else:
|
else:
|
||||||
msg = f"Reference '{path}' not found."
|
msg = f"Reference '{path}' not found."
|
||||||
raise KeyError(msg)
|
raise KeyError(msg)
|
||||||
@@ -32,66 +39,66 @@ def _retrieve_ref(path: str, schema: dict) -> dict:
|
|||||||
def _dereference_refs_helper(
|
def _dereference_refs_helper(
|
||||||
obj: Any,
|
obj: Any,
|
||||||
full_schema: dict[str, Any],
|
full_schema: dict[str, Any],
|
||||||
|
processed_refs: Optional[set[str]],
|
||||||
skip_keys: Sequence[str],
|
skip_keys: Sequence[str],
|
||||||
processed_refs: Optional[set[str]] = None,
|
shallow_refs: bool, # noqa: FBT001
|
||||||
) -> Any:
|
) -> Any:
|
||||||
|
"""Inline every pure {'$ref':...}.
|
||||||
|
|
||||||
|
But:
|
||||||
|
- if shallow_refs=True: only break cycles, do not inline nested refs
|
||||||
|
- if shallow_refs=False: deep-inline all nested refs
|
||||||
|
|
||||||
|
Also skip recursion under any key in skip_keys.
|
||||||
|
"""
|
||||||
if processed_refs is None:
|
if processed_refs is None:
|
||||||
processed_refs = set()
|
processed_refs = set()
|
||||||
|
|
||||||
|
# 1) Pure $ref node?
|
||||||
|
if isinstance(obj, dict) and set(obj.keys()) == {"$ref"}:
|
||||||
|
ref_path = obj["$ref"]
|
||||||
|
# cycle?
|
||||||
|
if ref_path in processed_refs:
|
||||||
|
return {}
|
||||||
|
processed_refs.add(ref_path)
|
||||||
|
|
||||||
|
# grab + copy the target
|
||||||
|
target = deepcopy(_retrieve_ref(ref_path, full_schema))
|
||||||
|
|
||||||
|
# deep inlining: recurse into everything
|
||||||
|
result = _dereference_refs_helper(
|
||||||
|
target, full_schema, processed_refs, skip_keys, shallow_refs
|
||||||
|
)
|
||||||
|
|
||||||
|
processed_refs.remove(ref_path)
|
||||||
|
return result
|
||||||
|
|
||||||
|
# 2) Not a pure-$ref: recurse, skipping any keys in skip_keys
|
||||||
if isinstance(obj, dict):
|
if isinstance(obj, dict):
|
||||||
obj_out = {}
|
out: dict[str, Any] = {}
|
||||||
for k, v in obj.items():
|
for k, v in obj.items():
|
||||||
if k in skip_keys:
|
if k in skip_keys:
|
||||||
obj_out[k] = v
|
# do not recurse under this key
|
||||||
elif k == "$ref":
|
out[k] = deepcopy(v)
|
||||||
if v in processed_refs:
|
elif isinstance(v, (dict, list)):
|
||||||
continue
|
out[k] = _dereference_refs_helper(
|
||||||
processed_refs.add(v)
|
v, full_schema, processed_refs, skip_keys, shallow_refs
|
||||||
ref = _retrieve_ref(v, full_schema)
|
|
||||||
full_ref = _dereference_refs_helper(
|
|
||||||
ref, full_schema, skip_keys, processed_refs
|
|
||||||
)
|
|
||||||
processed_refs.remove(v)
|
|
||||||
return full_ref
|
|
||||||
elif isinstance(v, (list, dict)):
|
|
||||||
obj_out[k] = _dereference_refs_helper(
|
|
||||||
v, full_schema, skip_keys, processed_refs
|
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
obj_out[k] = v
|
out[k] = v
|
||||||
return obj_out
|
return out
|
||||||
|
|
||||||
if isinstance(obj, list):
|
if isinstance(obj, list):
|
||||||
return [
|
return [
|
||||||
_dereference_refs_helper(el, full_schema, skip_keys, processed_refs)
|
_dereference_refs_helper(
|
||||||
for el in obj
|
item, full_schema, processed_refs, skip_keys, shallow_refs
|
||||||
|
)
|
||||||
|
for item in obj
|
||||||
]
|
]
|
||||||
|
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
|
|
||||||
def _infer_skip_keys(
|
|
||||||
obj: Any, full_schema: dict, processed_refs: Optional[set[str]] = None
|
|
||||||
) -> list[str]:
|
|
||||||
if processed_refs is None:
|
|
||||||
processed_refs = set()
|
|
||||||
|
|
||||||
keys = []
|
|
||||||
if isinstance(obj, dict):
|
|
||||||
for k, v in obj.items():
|
|
||||||
if k == "$ref":
|
|
||||||
if v in processed_refs:
|
|
||||||
continue
|
|
||||||
processed_refs.add(v)
|
|
||||||
ref = _retrieve_ref(v, full_schema)
|
|
||||||
keys.append(v.split("/")[1])
|
|
||||||
keys += _infer_skip_keys(ref, full_schema, processed_refs)
|
|
||||||
elif isinstance(v, (list, dict)):
|
|
||||||
keys += _infer_skip_keys(v, full_schema, processed_refs)
|
|
||||||
elif isinstance(obj, list):
|
|
||||||
for el in obj:
|
|
||||||
keys += _infer_skip_keys(el, full_schema, processed_refs)
|
|
||||||
return keys
|
|
||||||
|
|
||||||
|
|
||||||
def dereference_refs(
|
def dereference_refs(
|
||||||
schema_obj: dict,
|
schema_obj: dict,
|
||||||
*,
|
*,
|
||||||
@@ -101,17 +108,15 @@ def dereference_refs(
|
|||||||
"""Try to substitute $refs in JSON Schema.
|
"""Try to substitute $refs in JSON Schema.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
schema_obj: The schema object to dereference.
|
schema_obj: The fragment to dereference.
|
||||||
full_schema: The full schema object. Defaults to None.
|
full_schema: The complete schema (defaults to schema_obj).
|
||||||
skip_keys: The keys to skip. Defaults to None.
|
skip_keys:
|
||||||
|
- If None (the default), we skip recursion under '$defs' *and* only
|
||||||
Returns:
|
shallow-inline refs.
|
||||||
The dereferenced schema object.
|
- If provided (even as an empty list), we will recurse under every key and
|
||||||
|
deep-inline all refs.
|
||||||
"""
|
"""
|
||||||
full_schema = full_schema or schema_obj
|
full = full_schema or schema_obj
|
||||||
skip_keys = (
|
keys_to_skip = list(skip_keys) if skip_keys is not None else ["$defs"]
|
||||||
skip_keys
|
shallow = skip_keys is None
|
||||||
if skip_keys is not None
|
return _dereference_refs_helper(schema_obj, full, None, keys_to_skip, shallow)
|
||||||
else _infer_skip_keys(schema_obj, full_schema)
|
|
||||||
)
|
|
||||||
return _dereference_refs_helper(schema_obj, full_schema, skip_keys)
|
|
||||||
|
@@ -264,3 +264,183 @@ def test_dereference_refs_cyclical_refs() -> None:
|
|||||||
}
|
}
|
||||||
actual = dereference_refs(schema)
|
actual = dereference_refs(schema)
|
||||||
assert actual == expected
|
assert actual == expected
|
||||||
|
|
||||||
|
|
||||||
|
def test_dereference_refs_list_index() -> None:
|
||||||
|
"""Test dereferencing refs that use list indices (e.g., anyOf/1)."""
|
||||||
|
# Test case from the issue report - anyOf array with numeric index reference
|
||||||
|
schema = {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"payload": {
|
||||||
|
"anyOf": [
|
||||||
|
{ # variant 0
|
||||||
|
"type": "object",
|
||||||
|
"properties": {"kind": {"type": "string", "const": "ONE"}},
|
||||||
|
},
|
||||||
|
{ # variant 1
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"kind": {"type": "string", "const": "TWO"},
|
||||||
|
"startDate": {
|
||||||
|
"type": "string",
|
||||||
|
"pattern": r"^\d{4}-\d{2}-\d{2}$",
|
||||||
|
},
|
||||||
|
"endDate": {
|
||||||
|
"$ref": (
|
||||||
|
"#/properties/payload/anyOf/1/properties/startDate"
|
||||||
|
)
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
expected = {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"payload": {
|
||||||
|
"anyOf": [
|
||||||
|
{ # variant 0
|
||||||
|
"type": "object",
|
||||||
|
"properties": {"kind": {"type": "string", "const": "ONE"}},
|
||||||
|
},
|
||||||
|
{ # variant 1
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"kind": {"type": "string", "const": "TWO"},
|
||||||
|
"startDate": {
|
||||||
|
"type": "string",
|
||||||
|
"pattern": r"^\d{4}-\d{2}-\d{2}$",
|
||||||
|
},
|
||||||
|
"endDate": {
|
||||||
|
"type": "string",
|
||||||
|
"pattern": r"^\d{4}-\d{2}-\d{2}$",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
actual = dereference_refs(schema)
|
||||||
|
assert actual == expected
|
||||||
|
|
||||||
|
# Test oneOf array with numeric index reference
|
||||||
|
schema_oneof = {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"data": {
|
||||||
|
"oneOf": [
|
||||||
|
{"type": "string"},
|
||||||
|
{"type": "number"},
|
||||||
|
{
|
||||||
|
"type": "object",
|
||||||
|
"properties": {"value": {"$ref": "#/properties/data/oneOf/1"}},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
expected_oneof = {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"data": {
|
||||||
|
"oneOf": [
|
||||||
|
{"type": "string"},
|
||||||
|
{"type": "number"},
|
||||||
|
{"type": "object", "properties": {"value": {"type": "number"}}},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
actual_oneof = dereference_refs(schema_oneof)
|
||||||
|
assert actual_oneof == expected_oneof
|
||||||
|
|
||||||
|
# Test allOf array with numeric index reference
|
||||||
|
schema_allof = {
|
||||||
|
"type": "object",
|
||||||
|
"allOf": [
|
||||||
|
{"properties": {"name": {"type": "string"}}},
|
||||||
|
{"properties": {"age": {"type": "number"}}},
|
||||||
|
],
|
||||||
|
"properties": {"copy_name": {"$ref": "#/allOf/0/properties/name"}},
|
||||||
|
}
|
||||||
|
|
||||||
|
expected_allof = {
|
||||||
|
"type": "object",
|
||||||
|
"allOf": [
|
||||||
|
{"properties": {"name": {"type": "string"}}},
|
||||||
|
{"properties": {"age": {"type": "number"}}},
|
||||||
|
],
|
||||||
|
"properties": {"copy_name": {"type": "string"}},
|
||||||
|
}
|
||||||
|
|
||||||
|
actual_allof = dereference_refs(schema_allof)
|
||||||
|
assert actual_allof == expected_allof
|
||||||
|
|
||||||
|
# Test edge case: out-of-bounds index should raise KeyError
|
||||||
|
schema_invalid = {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"data": {"anyOf": [{"type": "string"}]},
|
||||||
|
"invalid": {"$ref": "#/properties/data/anyOf/5"}, # Index 5 doesn't exist
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
with pytest.raises(
|
||||||
|
KeyError, match="Reference '#/properties/data/anyOf/5' not found"
|
||||||
|
):
|
||||||
|
dereference_refs(schema_invalid)
|
||||||
|
|
||||||
|
# Test edge case: negative index should raise KeyError
|
||||||
|
schema_negative = {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"data": {"anyOf": [{"type": "string"}]},
|
||||||
|
"invalid": {"$ref": "#/properties/data/anyOf/-1"}, # Negative index
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
with pytest.raises(
|
||||||
|
KeyError, match="Reference '#/properties/data/anyOf/-1' not found"
|
||||||
|
):
|
||||||
|
dereference_refs(schema_negative)
|
||||||
|
|
||||||
|
# Test that existing dictionary-based numeric key functionality still works
|
||||||
|
schema_dict_key = {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"error_400": {"$ref": "#/$defs/400"},
|
||||||
|
},
|
||||||
|
"$defs": {
|
||||||
|
400: {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {"description": "Bad Request"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
expected_dict_key = {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"error_400": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {"description": "Bad Request"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"$defs": {
|
||||||
|
400: {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {"description": "Bad Request"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
actual_dict_key = dereference_refs(schema_dict_key)
|
||||||
|
assert actual_dict_key == expected_dict_key
|
||||||
|
@@ -1,13 +1,11 @@
|
|||||||
from langchain_core.utils.json_schema import (
|
from langchain_core.utils.json_schema import (
|
||||||
_dereference_refs_helper,
|
_dereference_refs_helper,
|
||||||
_infer_skip_keys,
|
|
||||||
_retrieve_ref,
|
_retrieve_ref,
|
||||||
dereference_refs,
|
dereference_refs,
|
||||||
)
|
)
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"_dereference_refs_helper",
|
"_dereference_refs_helper",
|
||||||
"_infer_skip_keys",
|
|
||||||
"_retrieve_ref",
|
"_retrieve_ref",
|
||||||
"dereference_refs",
|
"dereference_refs",
|
||||||
]
|
]
|
||||||
|
Reference in New Issue
Block a user