From ab527027acfce774d7528b5baf4839e55bfbdd81 Mon Sep 17 00:00:00 2001 From: Bruno Alvisio Date: Mon, 2 Sep 2024 13:17:39 -0700 Subject: [PATCH] community: Resolve refs recursively when generating openai_fn from OpenAPI spec (#19002) - **Description:** This PR is intended to improve the generation of payloads for OpenAI functions when converting from an OpenAPI spec file. The solution is to recursively resolve `$refs`. Currently when converting OpenAPI specs into OpenAI functions using `openapi_spec_to_openai_fn`, if the schemas have nested references, the generated functions contain `$ref` that causes the LLM to generate payloads with an incorrect schema. For example, for the for OpenAPI spec: ``` text = """ { "openapi": "3.0.3", "info": { "title": "Swagger Petstore - OpenAPI 3.0", "termsOfService": "http://swagger.io/terms/", "contact": { "email": "apiteam@swagger.io" }, "license": { "name": "Apache 2.0", "url": "http://www.apache.org/licenses/LICENSE-2.0.html" }, "version": "1.0.11" }, "externalDocs": { "description": "Find out more about Swagger", "url": "http://swagger.io" }, "servers": [ { "url": "https://petstore3.swagger.io/api/v3" } ], "tags": [ { "name": "pet", "description": "Everything about your Pets", "externalDocs": { "description": "Find out more", "url": "http://swagger.io" } }, { "name": "store", "description": "Access to Petstore orders", "externalDocs": { "description": "Find out more about our store", "url": "http://swagger.io" } }, { "name": "user", "description": "Operations about user" } ], "paths": { "/pet": { "post": { "tags": [ "pet" ], "summary": "Add a new pet to the store", "description": "Add a new pet to the store", "operationId": "addPet", "requestBody": { "description": "Create a new pet in the store", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/Pet" } } }, "required": true }, "responses": { "200": { "description": "Successful operation", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/Pet" } } } } } } } }, "components": { "schemas": { "Tag": { "type": "object", "properties": { "id": { "type": "integer", "format": "int64" }, "model_type": { "type": "number" } } }, "Category": { "type": "object", "required": [ "model", "year", "age" ], "properties": { "year": { "type": "integer", "format": "int64", "example": 1 }, "model": { "type": "string", "example": "Ford" }, "age": { "type": "integer", "example": 42 } } }, "Pet": { "required": [ "name" ], "type": "object", "properties": { "id": { "type": "integer", "format": "int64", "example": 10 }, "name": { "type": "string", "example": "doggie" }, "category": { "$ref": "#/components/schemas/Category" }, "tags": { "type": "array", "items": { "$ref": "#/components/schemas/Tag" } }, "status": { "type": "string", "description": "pet status in the store", "enum": [ "available", "pending", "sold" ] } } } } } } """ ``` Executing: ``` spec = OpenAPISpec.from_text(text) pet_openai_functions, pet_callables = openapi_spec_to_openai_fn(spec) response = model.invoke("Create a pet named Scott", functions=pet_openai_functions) ``` `pet_open_functions` contains unresolved `$refs`: ``` [ { "name": "addPet", "description": "Add a new pet to the store", "parameters": { "type": "object", "properties": { "json": { "properties": { "id": { "type": "integer", "schema_format": "int64", "example": 10 }, "name": { "type": "string", "example": "doggie" }, "category": { "ref": "#/components/schemas/Category" }, "tags": { "items": { "ref": "#/components/schemas/Tag" }, "type": "array" }, "status": { "type": "string", "enum": [ "available", "pending", "sold" ], "description": "pet status in the store" } }, "type": "object", "required": [ "name", "photoUrls" ] } } } } ] ``` and the generated JSON has an incorrect schema (e.g. category is filled with `id` and `name` instead of `model`, `year` and `age`: ``` { "id": 1, "name": "Scott", "category": { "id": 1, "name": "Dogs" }, "tags": [ { "id": 1, "name": "tag1" } ], "status": "available" } ``` With this change, the generated JSON by the LLM becomes, `pet_openai_functions` becomes: ``` [ { "name": "addPet", "description": "Add a new pet to the store", "parameters": { "type": "object", "properties": { "json": { "properties": { "id": { "type": "integer", "schema_format": "int64", "example": 10 }, "name": { "type": "string", "example": "doggie" }, "category": { "properties": { "year": { "type": "integer", "schema_format": "int64", "example": 1 }, "model": { "type": "string", "example": "Ford" }, "age": { "type": "integer", "example": 42 } }, "type": "object", "required": [ "model", "year", "age" ] }, "tags": { "items": { "properties": { "id": { "type": "integer", "schema_format": "int64" }, "model_type": { "type": "number" } }, "type": "object" }, "type": "array" }, "status": { "type": "string", "enum": [ "available", "pending", "sold" ], "description": "pet status in the store" } }, "type": "object", "required": [ "name" ] } } } } ] ``` and the JSON generated by the LLM is: ``` { "id": 1, "name": "Scott", "category": { "year": 2022, "model": "Dog", "age": 42 }, "tags": [ { "id": 1, "model_type": 1 } ], "status": "available" } ``` which has the intended schema. - **Twitter handle:**: @brunoalvisio --------- Co-authored-by: Harrison Chase --- .../langchain_community/utilities/openapi.py | 24 ++++++- .../openapi_spec_nested_ref.json | 70 +++++++++++++++++++ .../unit_tests/utilities/test_openapi.py | 49 +++++++++++++ 3 files changed, 141 insertions(+), 2 deletions(-) create mode 100644 libs/community/tests/unit_tests/data/openapi_specs/openapi_spec_nested_ref.json diff --git a/libs/community/langchain_community/utilities/openapi.py b/libs/community/langchain_community/utilities/openapi.py index 9660a5408d3..6bd9182713a 100644 --- a/libs/community/langchain_community/utilities/openapi.py +++ b/libs/community/langchain_community/utilities/openapi.py @@ -129,11 +129,31 @@ class OpenAPISpec(OpenAPI): raise ValueError(f"No schema found for {ref_name}") return schemas[ref_name] - def get_schema(self, schema: Union[Reference, Schema]) -> Schema: + def get_schema( + self, + schema: Union[Reference, Schema], + depth: int = 0, + max_depth: Optional[int] = None, + ) -> Schema: + if max_depth is not None and depth >= max_depth: + raise RecursionError( + f"Max depth of {max_depth} has been exceeded when resolving references." + ) + from openapi_pydantic import Reference if isinstance(schema, Reference): - return self.get_referenced_schema(schema) + schema = self.get_referenced_schema(schema) + + # TODO: Resolve references on all fields of Schema ? + # (e.g. patternProperties, etc...) + if schema.properties is not None: + for p_name, p in schema.properties.items(): + schema.properties[p_name] = self.get_schema(p, depth + 1, max_depth) + + if schema.items is not None: + schema.items = self.get_schema(schema.items, depth + 1, max_depth) + return schema def _get_root_referenced_schema(self, ref: Reference) -> Schema: diff --git a/libs/community/tests/unit_tests/data/openapi_specs/openapi_spec_nested_ref.json b/libs/community/tests/unit_tests/data/openapi_specs/openapi_spec_nested_ref.json new file mode 100644 index 00000000000..18190feda3d --- /dev/null +++ b/libs/community/tests/unit_tests/data/openapi_specs/openapi_spec_nested_ref.json @@ -0,0 +1,70 @@ +{ + "openapi": "3.0.3", + "info": { + "title": "Swagger Petstore - OpenAPI 3.0", + "license": { + "name": "Apache 2.0", + "url": "http://www.apache.org/licenses/LICENSE-2.0.html" + }, + "version": "1.0.11" + }, + "paths": { + "/pet": { + "post": { + "summary": "Add a new pet to the store", + "description": "Add a new pet to the store", + "operationId": "addPet", + "requestBody": { + "description": "Create a new pet in the store", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Pet" + } + } + }, + "required": true + } + } + } + }, + "components": { + "schemas": { + "Tag": { + "type": "object", + "properties": { + "id": { + "type": "integer", + "format": "int64" + }, + "model_type": { + "type": "number" + } + } + }, + "Pet": { + "required": [ + "name" + ], + "type": "object", + "properties": { + "id": { + "type": "integer", + "format": "int64", + "example": 10 + }, + "name": { + "type": "string", + "example": "doggie" + }, + "tags": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Tag" + } + } + } + } + } + } + } \ No newline at end of file diff --git a/libs/community/tests/unit_tests/utilities/test_openapi.py b/libs/community/tests/unit_tests/utilities/test_openapi.py index e7e8b745573..377c86ad56e 100644 --- a/libs/community/tests/unit_tests/utilities/test_openapi.py +++ b/libs/community/tests/unit_tests/utilities/test_openapi.py @@ -42,3 +42,52 @@ def test_header_param() -> None: openai_functions, _ = openapi_spec_to_openai_fn(spec) assert openai_functions == EXPECTED_OPENAI_FUNCTIONS_HEADER_PARAM + + +EXPECTED_OPENAI_FUNCTIONS_NESTED_REF = [ + { + "name": "addPet", + "description": "Add a new pet to the store", + "parameters": { + "type": "object", + "properties": { + "json": { + "properties": { + "id": { + "type": "integer", + "schema_format": "int64", + "example": 10, + }, + "name": {"type": "string", "example": "doggie"}, + "tags": { + "items": { + "properties": { + "id": {"type": "integer", "schema_format": "int64"}, + "model_type": {"type": "number"}, + }, + "type": "object", + }, + "type": "array", + }, + }, + "type": "object", + "required": ["name"], + } + }, + }, + } +] + + +@pytest.mark.requires("openapi_pydantic") +def test_nested_ref_in_openapi_spec() -> None: + spec = OpenAPISpec.from_file( + Path(__file__).parent.parent + / "data" + / "openapi_specs" + / "openapi_spec_nested_ref.json", + ) + + openai_functions, _ = openapi_spec_to_openai_fn(spec) + + assert openai_functions == EXPECTED_OPENAI_FUNCTIONS_NESTED_REF