From eb25d7472d969976859d677c74a2188dcec85ac4 Mon Sep 17 00:00:00 2001 From: OysterMax Date: Fri, 16 May 2025 13:20:32 -0700 Subject: [PATCH] core: support `Union` type args in strict mode of OpenAI function calling / structured output (#30971) **Issue:**[ #309070](https://github.com/langchain-ai/langchain/issues/30970) **Cause** Arg type in python code ``` arg: Union[SubSchema1, SubSchema2] ``` is translated to `anyOf` in **json schema** ``` "anyOf" : [{sub schema 1 ...}, {sub schema 1 ...}] ``` The value of anyOf is a list sub schemas. The bug is caused since the sub schemas inside `anyOf` list is not taken care of. The location where the issue happens is `convert_to_openai_function` function -> `_recursive_set_additional_properties_false` function, that recursively adds `"additionalProperties": false` to json schema which is [required by OpenAI's strict function calling](https://platform.openai.com/docs/guides/structured-outputs?api-mode=responses#additionalproperties-false-must-always-be-set-in-objects) **Solution:** This PR fixes this issue by iterating each sub schema inside `anyOf` list. A unit test is added. **Twitter handle:** shengboma If no one reviews your PR within a few days, please @-mention one of baskaryan, eyurtsev, ccurme, vbarda, hwchase17. --------- Co-authored-by: ccurme --- .../langchain_core/utils/function_calling.py | 7 ++- .../unit_tests/utils/test_function_calling.py | 55 +++++++++++++++++++ 2 files changed, 60 insertions(+), 2 deletions(-) diff --git a/libs/core/langchain_core/utils/function_calling.py b/libs/core/langchain_core/utils/function_calling.py index c664ec03e3d..aa288bedc53 100644 --- a/libs/core/langchain_core/utils/function_calling.py +++ b/libs/core/langchain_core/utils/function_calling.py @@ -788,9 +788,12 @@ def _recursive_set_additional_properties_false( schema["additionalProperties"] = False # Recursively check 'properties' and 'items' if they exist + if "anyOf" in schema: + for sub_schema in schema["anyOf"]: + _recursive_set_additional_properties_false(sub_schema) if "properties" in schema: - for value in schema["properties"].values(): - _recursive_set_additional_properties_false(value) + for sub_schema in schema["properties"].values(): + _recursive_set_additional_properties_false(sub_schema) if "items" in schema: _recursive_set_additional_properties_false(schema["items"]) diff --git a/libs/core/tests/unit_tests/utils/test_function_calling.py b/libs/core/tests/unit_tests/utils/test_function_calling.py index 126c1753f4e..f75ae304937 100644 --- a/libs/core/tests/unit_tests/utils/test_function_calling.py +++ b/libs/core/tests/unit_tests/utils/test_function_calling.py @@ -497,6 +497,61 @@ def test_convert_to_openai_function_nested_strict() -> None: assert actual == expected +def test_convert_to_openai_function_strict_union_of_objects_arg_type() -> None: + class NestedA(BaseModel): + foo: str + + class NestedB(BaseModel): + bar: int + + class NestedC(BaseModel): + baz: bool + + def my_function(my_arg: Union[NestedA, NestedB, NestedC]) -> None: + """Dummy function.""" + + expected = { + "name": "my_function", + "description": "Dummy function.", + "parameters": { + "properties": { + "my_arg": { + "anyOf": [ + { + "properties": {"foo": {"title": "Foo", "type": "string"}}, + "required": ["foo"], + "title": "NestedA", + "type": "object", + "additionalProperties": False, + }, + { + "properties": {"bar": {"title": "Bar", "type": "integer"}}, + "required": ["bar"], + "title": "NestedB", + "type": "object", + "additionalProperties": False, + }, + { + "properties": {"baz": {"title": "Baz", "type": "boolean"}}, + "required": ["baz"], + "title": "NestedC", + "type": "object", + "additionalProperties": False, + }, + ] + } + }, + "required": ["my_arg"], + "type": "object", + "additionalProperties": False, + }, + "strict": True, + } + + actual = convert_to_openai_function(my_function, strict=True) + assert actual == expected + + json_schema_no_description_no_params = { "title": "dummy_function", }