From 7afecec280bc1aa252d2d81da2f6026302d3c22e Mon Sep 17 00:00:00 2001 From: Roshan Santhosh Date: Mon, 18 Mar 2024 21:25:06 -0700 Subject: [PATCH] core: update _rm_titles to account for title argument name bug (#19036) Issue : For functions which have an argument with the name 'title', the convert_pydantic_to_openai_function generates an incorrect output and omits the argument all together. This is because the _rm_titles function removes all instances of the the key 'title' from the output. Description : Updates the _rm_titles function to check the presence of the 'type' key as well before removing the 'title' key. As the title key that we wish to omit always has a type key along with it. Potential gap if there is a function defined which has both title and key as argument names, in which case this would fail. Maybe we could set a filter on the function argument names and reject those with keyword argument names. No dependencies. Passed all tests. - [x] **PR title**: "package: description" - Where "package" is whichever of langchain, community, core, experimental, etc. is being modified. Use "docs: ..." for purely docs changes, "templates: ..." for template changes, "infra: ..." for CI changes. - Example: "community: add foobar LLM" - [x] **PR message**: ***Delete this entire checklist*** and replace with - **Description:** a description of the change - **Issue:** the issue # it fixes, if applicable - **Dependencies:** any dependencies required for this change - **Twitter handle:** if your PR gets announced, and you'd like a mention, we'll gladly shout you out! - [x] **Add tests and docs**: If you're adding a new integration, please include 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. It lives in `docs/docs/integrations` directory. - [x] **Lint and test**: Run `make format`, `make lint` and `make test` from the root of the package(s) you've modified. See contribution guidelines for more: https://python.langchain.com/docs/contributing/ Additional guidelines: - Make sure optional dependencies are imported within a function. - Please do not add dependencies to pyproject.toml files (even optional ones) unless they are required for unit tests. - Most PRs should not touch more than one package. - Changes should be backwards compatible. - If you are adding something to community, do not re-import it in langchain. If no one reviews your PR within a few days, please @-mention one of baskaryan, efriis, eyurtsev, hwchase17. --------- Co-authored-by: Eugene Yurtsev --- .../langchain_core/utils/function_calling.py | 10 +- .../tests/unit_tests/utils/test_rm_titles.py | 199 ++++++++++++++++++ 2 files changed, 206 insertions(+), 3 deletions(-) create mode 100644 libs/core/tests/unit_tests/utils/test_rm_titles.py diff --git a/libs/core/langchain_core/utils/function_calling.py b/libs/core/langchain_core/utils/function_calling.py index a0cadca4f33..5977615654c 100644 --- a/libs/core/langchain_core/utils/function_calling.py +++ b/libs/core/langchain_core/utils/function_calling.py @@ -1,4 +1,5 @@ """Methods for creating function specs in the style of OpenAI Functions""" + from __future__ import annotations import inspect @@ -51,13 +52,16 @@ class ToolDescription(TypedDict): function: FunctionDescription -def _rm_titles(kv: dict) -> dict: +def _rm_titles(kv: dict, prev_key: str = "") -> dict: new_kv = {} for k, v in kv.items(): if k == "title": - continue + if isinstance(v, dict) and prev_key == "properties" and "title" in v.keys(): + new_kv[k] = _rm_titles(v, k) + else: + continue elif isinstance(v, dict): - new_kv[k] = _rm_titles(v) + new_kv[k] = _rm_titles(v, k) else: new_kv[k] = v return new_kv diff --git a/libs/core/tests/unit_tests/utils/test_rm_titles.py b/libs/core/tests/unit_tests/utils/test_rm_titles.py new file mode 100644 index 00000000000..731510dd873 --- /dev/null +++ b/libs/core/tests/unit_tests/utils/test_rm_titles.py @@ -0,0 +1,199 @@ +import pytest + +from langchain_core.utils.function_calling import _rm_titles + +output1 = { + "type": "object", + "properties": { + "people": { + "description": "List of info about people", + "type": "array", + "items": { + "description": "Information about a person.", + "type": "object", + "properties": { + "name": {"type": "string"}, + "title": {"description": "person's age", "type": "integer"}, + }, + "required": ["name"], + }, + } + }, + "required": ["people"], +} + +schema1 = { + "type": "object", + "properties": { + "people": { + "title": "People", + "description": "List of info about people", + "type": "array", + "items": { + "title": "Person", + "description": "Information about a person.", + "type": "object", + "properties": { + "name": {"title": "Name", "type": "string"}, + "title": { + "title": "Title", + "description": "person's age", + "type": "integer", + }, + }, + "required": ["name"], + }, + } + }, + "required": ["people"], +} + +output2 = { + "type": "object", + "properties": { + "title": { + "description": "List of info about people", + "type": "array", + "items": { + "description": "Information about a person.", + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"description": "person's age", "type": "integer"}, + }, + "required": ["name"], + }, + } + }, + "required": ["title"], +} + +schema2 = { + "type": "object", + "properties": { + "title": { + "title": "Title", + "description": "List of info about people", + "type": "array", + "items": { + "title": "Person", + "description": "Information about a person.", + "type": "object", + "properties": { + "name": {"title": "Name", "type": "string"}, + "age": { + "title": "Age", + "description": "person's age", + "type": "integer", + }, + }, + "required": ["name"], + }, + } + }, + "required": ["title"], +} + + +output3 = { + "type": "object", + "properties": { + "title": { + "description": "List of info about people", + "type": "array", + "items": { + "description": "Information about a person.", + "type": "object", + "properties": { + "title": {"type": "string"}, + "type": {"description": "person's age", "type": "integer"}, + }, + "required": ["title"], + }, + } + }, + "required": ["title"], +} + +schema3 = { + "type": "object", + "properties": { + "title": { + "title": "Title", + "description": "List of info about people", + "type": "array", + "items": { + "title": "Person", + "description": "Information about a person.", + "type": "object", + "properties": { + "title": {"title": "Title", "type": "string"}, + "type": { + "title": "Type", + "description": "person's age", + "type": "integer", + }, + }, + "required": ["title"], + }, + } + }, + "required": ["title"], +} + + +output4 = { + "type": "object", + "properties": { + "properties": { + "description": "Information to extract", + "type": "object", + "properties": { + "title": { + "description": "Information about papers mentioned.", + "type": "object", + "properties": { + "title": {"type": "string"}, + "author": {"type": "string"}, + }, + "required": ["title"], + } + }, + "required": ["title"], + } + }, + "required": ["properties"], +} + +schema4 = { + "type": "object", + "properties": { + "properties": { + "title": "Info", + "description": "Information to extract", + "type": "object", + "properties": { + "title": { + "title": "Paper", + "description": "Information about papers mentioned.", + "type": "object", + "properties": { + "title": {"title": "Title", "type": "string"}, + "author": {"title": "Author", "type": "string"}, + }, + "required": ["title"], + } + }, + "required": ["title"], + } + }, + "required": ["properties"], +} + + +@pytest.mark.parametrize( + "schema, output", + [(schema1, output1), (schema2, output2), (schema3, output3), (schema4, output4)], +) +def test_rm_titles(schema: dict, output: dict) -> None: + assert _rm_titles(schema) == output