From eb9b992aa69553bc1c49ec681809d9559dedb758 Mon Sep 17 00:00:00 2001 From: ccurme Date: Mon, 17 Mar 2025 12:02:21 -0400 Subject: [PATCH] openai[patch]: support additional Responses API features (#30322) - Include response headers - Max tokens - Reasoning effort - Fix bug with structured output / strict - Fix bug with simultaneous tool calling + structured output --- .../langchain_openai/chat_models/base.py | 117 ++++++++++---- libs/partners/openai/pyproject.toml | 2 +- .../chat_models/test_azure_standard.py | 2 +- .../chat_models/test_base.py | 151 +++++++++++------- .../chat_models/test_responses_api.py | 49 +++++- libs/partners/openai/uv.lock | 10 +- 6 files changed, 239 insertions(+), 92 deletions(-) diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index 4f0e9b3f805..41cd16d6daa 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -750,18 +750,29 @@ class BaseChatOpenAI(BaseChatModel): ) -> Iterator[ChatGenerationChunk]: kwargs["stream"] = True payload = self._get_request_payload(messages, stop=stop, **kwargs) - context_manager = self.root_client.responses.create(**payload) + if self.include_response_headers: + raw_context_manager = self.root_client.with_raw_response.responses.create( + **payload + ) + context_manager = raw_context_manager.parse() + headers = {"headers": dict(raw_context_manager.headers)} + else: + context_manager = self.root_client.responses.create(**payload) + headers = {} original_schema_obj = kwargs.get("response_format") with context_manager as response: + is_first_chunk = True for chunk in response: + metadata = headers if is_first_chunk else {} if generation_chunk := _convert_responses_chunk_to_generation_chunk( - chunk, schema=original_schema_obj + chunk, schema=original_schema_obj, metadata=metadata ): if run_manager: run_manager.on_llm_new_token( generation_chunk.text, chunk=generation_chunk ) + is_first_chunk = False yield generation_chunk async def _astream_responses( @@ -773,18 +784,31 @@ class BaseChatOpenAI(BaseChatModel): ) -> AsyncIterator[ChatGenerationChunk]: kwargs["stream"] = True payload = self._get_request_payload(messages, stop=stop, **kwargs) - context_manager = await self.root_async_client.responses.create(**payload) + if self.include_response_headers: + raw_context_manager = ( + await self.root_async_client.with_raw_response.responses.create( + **payload + ) + ) + context_manager = raw_context_manager.parse() + headers = {"headers": dict(raw_context_manager.headers)} + else: + context_manager = await self.root_async_client.responses.create(**payload) + headers = {} original_schema_obj = kwargs.get("response_format") async with context_manager as response: + is_first_chunk = True async for chunk in response: + metadata = headers if is_first_chunk else {} if generation_chunk := _convert_responses_chunk_to_generation_chunk( - chunk, schema=original_schema_obj + chunk, schema=original_schema_obj, metadata=metadata ): if run_manager: await run_manager.on_llm_new_token( generation_chunk.text, chunk=generation_chunk ) + is_first_chunk = False yield generation_chunk def _stream( @@ -877,19 +901,26 @@ class BaseChatOpenAI(BaseChatModel): response = self.root_client.beta.chat.completions.parse(**payload) except openai.BadRequestError as e: _handle_openai_bad_request(e) - elif self.include_response_headers: - raw_response = self.client.with_raw_response.create(**payload) - response = raw_response.parse() - generation_info = {"headers": dict(raw_response.headers)} elif self._use_responses_api(payload): original_schema_obj = kwargs.get("response_format") if original_schema_obj and _is_pydantic_class(original_schema_obj): response = self.root_client.responses.parse(**payload) else: - response = self.root_client.responses.create(**payload) + if self.include_response_headers: + raw_response = self.root_client.with_raw_response.responses.create( + **payload + ) + response = raw_response.parse() + generation_info = {"headers": dict(raw_response.headers)} + else: + response = self.root_client.responses.create(**payload) return _construct_lc_result_from_responses_api( - response, schema=original_schema_obj + response, schema=original_schema_obj, metadata=generation_info ) + elif self.include_response_headers: + raw_response = self.client.with_raw_response.create(**payload) + response = raw_response.parse() + generation_info = {"headers": dict(raw_response.headers)} else: response = self.client.create(**payload) return self._create_chat_result(response, generation_info) @@ -1065,20 +1096,28 @@ class BaseChatOpenAI(BaseChatModel): ) except openai.BadRequestError as e: _handle_openai_bad_request(e) - elif self.include_response_headers: - raw_response = await self.async_client.with_raw_response.create(**payload) - response = raw_response.parse() - generation_info = {"headers": dict(raw_response.headers)} elif self._use_responses_api(payload): original_schema_obj = kwargs.get("response_format") if original_schema_obj and _is_pydantic_class(original_schema_obj): response = await self.root_async_client.responses.parse(**payload) else: - response = await self.root_async_client.responses.create(**payload) + if self.include_response_headers: + raw_response = ( + await self.root_async_client.with_raw_response.responses.create( + **payload + ) + ) + response = raw_response.parse() + generation_info = {"headers": dict(raw_response.headers)} + else: + response = await self.root_async_client.responses.create(**payload) return _construct_lc_result_from_responses_api( - response, schema=original_schema_obj + response, schema=original_schema_obj, metadata=generation_info ) - + elif self.include_response_headers: + raw_response = await self.async_client.with_raw_response.create(**payload) + response = raw_response.parse() + generation_info = {"headers": dict(raw_response.headers)} else: response = await self.async_client.create(**payload) return await run_in_executor( @@ -2834,6 +2873,13 @@ def _use_responses_api(payload: dict) -> bool: def _construct_responses_api_payload( messages: Sequence[BaseMessage], payload: dict ) -> dict: + # Rename legacy parameters + for legacy_token_param in ["max_tokens", "max_completion_tokens"]: + if legacy_token_param in payload: + payload["max_output_tokens"] = payload.pop(legacy_token_param) + if "reasoning_effort" in payload: + payload["reasoning"] = {"effort": payload.pop("reasoning_effort")} + payload["input"] = _construct_responses_api_input(messages) if tools := payload.pop("tools", None): new_tools: list = [] @@ -2868,17 +2914,23 @@ def _construct_responses_api_payload( # For pydantic + non-streaming case, we use responses.parse. # Otherwise, we use responses.create. + strict = payload.pop("strict", None) if not payload.get("stream") and _is_pydantic_class(schema): payload["text_format"] = schema else: if _is_pydantic_class(schema): schema_dict = schema.model_json_schema() + strict = True else: schema_dict = schema if schema_dict == {"type": "json_object"}: # JSON mode payload["text"] = {"format": {"type": "json_object"}} elif ( - (response_format := _convert_to_openai_response_format(schema_dict)) + ( + response_format := _convert_to_openai_response_format( + schema_dict, strict=strict + ) + ) and (isinstance(response_format, dict)) and (response_format["type"] == "json_schema") ): @@ -2993,7 +3045,9 @@ def _construct_responses_api_input(messages: Sequence[BaseMessage]) -> list: def _construct_lc_result_from_responses_api( - response: Response, schema: Optional[Type[_BM]] = None + response: Response, + schema: Optional[Type[_BM]] = None, + metadata: Optional[dict] = None, ) -> ChatResult: """Construct ChatResponse from OpenAI Response API response.""" if response.error: @@ -3014,6 +3068,8 @@ def _construct_lc_result_from_responses_api( "model", ) } + if metadata: + response_metadata.update(metadata) # for compatibility with chat completion calls. response_metadata["model_name"] = response_metadata.get("model") if response.usage: @@ -3099,17 +3155,21 @@ def _construct_lc_result_from_responses_api( if ( schema is not None and "parsed" not in additional_kwargs + and response.output_text # tool calls can generate empty output text and response.text and (text_config := response.text.model_dump()) and (format_ := text_config.get("format", {})) and (format_.get("type") == "json_schema") ): - parsed_dict = json.loads(response.output_text) - if schema and _is_pydantic_class(schema): - parsed = schema(**parsed_dict) - else: - parsed = parsed_dict - additional_kwargs["parsed"] = parsed + try: + parsed_dict = json.loads(response.output_text) + if schema and _is_pydantic_class(schema): + parsed = schema(**parsed_dict) + else: + parsed = parsed_dict + additional_kwargs["parsed"] = parsed + except json.JSONDecodeError: + pass message = AIMessage( content=content_blocks, id=msg_id, @@ -3123,12 +3183,15 @@ def _construct_lc_result_from_responses_api( def _convert_responses_chunk_to_generation_chunk( - chunk: Any, schema: Optional[Type[_BM]] = None + chunk: Any, schema: Optional[Type[_BM]] = None, metadata: Optional[dict] = None ) -> Optional[ChatGenerationChunk]: content = [] tool_call_chunks: list = [] additional_kwargs: dict = {} - response_metadata = {} + if metadata: + response_metadata = metadata + else: + response_metadata = {} usage_metadata = None id = None if chunk.type == "response.output_text.delta": diff --git a/libs/partners/openai/pyproject.toml b/libs/partners/openai/pyproject.toml index 4b4939d1032..fbc921278cd 100644 --- a/libs/partners/openai/pyproject.toml +++ b/libs/partners/openai/pyproject.toml @@ -8,7 +8,7 @@ license = { text = "MIT" } requires-python = "<4.0,>=3.9" dependencies = [ "langchain-core<1.0.0,>=0.3.45-rc.1", - "openai<2.0.0,>=1.66.0", + "openai<2.0.0,>=1.66.3", "tiktoken<1,>=0.7", ] name = "langchain-openai" diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_azure_standard.py b/libs/partners/openai/tests/integration_tests/chat_models/test_azure_standard.py index f0697509347..f5820794bb3 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_azure_standard.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_azure_standard.py @@ -22,7 +22,7 @@ class TestAzureOpenAIStandard(ChatModelIntegrationTests): def chat_model_params(self) -> dict: return { "deployment_name": os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"], - "model": "gpt-4o", + "model": "gpt-4o-mini", "openai_api_version": OPENAI_API_VERSION, "azure_endpoint": OPENAI_API_BASE, } diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_base.py b/libs/partners/openai/tests/integration_tests/chat_models/test_base.py index 09cae79520b..903b4fd88ed 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_base.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_base.py @@ -31,6 +31,8 @@ from pydantic import BaseModel, Field from langchain_openai import ChatOpenAI from tests.unit_tests.fake.callbacks import FakeCallbackHandler +MAX_TOKEN_COUNT = 16 + @pytest.mark.scheduled def test_chat_openai() -> None: @@ -44,7 +46,7 @@ def test_chat_openai() -> None: max_retries=3, http_client=None, n=1, - max_completion_tokens=10, + max_tokens=MAX_TOKEN_COUNT, # type: ignore[call-arg] default_headers=None, default_query=None, ) @@ -62,20 +64,21 @@ def test_chat_openai_model() -> None: assert chat.model_name == "bar" -def test_chat_openai_system_message() -> None: +@pytest.mark.parametrize("use_responses_api", [False, True]) +def test_chat_openai_system_message(use_responses_api: bool) -> None: """Test ChatOpenAI wrapper with system message.""" - chat = ChatOpenAI(max_completion_tokens=10) + chat = ChatOpenAI(use_responses_api=use_responses_api, max_tokens=MAX_TOKEN_COUNT) # type: ignore[call-arg] system_message = SystemMessage(content="You are to chat with the user.") human_message = HumanMessage(content="Hello") response = chat.invoke([system_message, human_message]) assert isinstance(response, BaseMessage) - assert isinstance(response.content, str) + assert isinstance(response.text(), str) @pytest.mark.scheduled def test_chat_openai_generate() -> None: """Test ChatOpenAI wrapper with generate.""" - chat = ChatOpenAI(max_completion_tokens=10, n=2) + chat = ChatOpenAI(max_tokens=MAX_TOKEN_COUNT, n=2) # type: ignore[call-arg] message = HumanMessage(content="Hello") response = chat.generate([[message], [message]]) assert isinstance(response, LLMResult) @@ -92,7 +95,7 @@ def test_chat_openai_generate() -> None: @pytest.mark.scheduled def test_chat_openai_multiple_completions() -> None: """Test ChatOpenAI wrapper with multiple completions.""" - chat = ChatOpenAI(max_completion_tokens=10, n=5) + chat = ChatOpenAI(max_tokens=MAX_TOKEN_COUNT, n=5) # type: ignore[call-arg] message = HumanMessage(content="Hello") response = chat._generate([message]) assert isinstance(response, ChatResult) @@ -103,16 +106,18 @@ def test_chat_openai_multiple_completions() -> None: @pytest.mark.scheduled -def test_chat_openai_streaming() -> None: +@pytest.mark.parametrize("use_responses_api", [False, True]) +def test_chat_openai_streaming(use_responses_api: bool) -> None: """Test that streaming correctly invokes on_llm_new_token callback.""" callback_handler = FakeCallbackHandler() callback_manager = CallbackManager([callback_handler]) chat = ChatOpenAI( - max_completion_tokens=10, + max_tokens=MAX_TOKEN_COUNT, # type: ignore[call-arg] streaming=True, temperature=0, callback_manager=callback_manager, verbose=True, + use_responses_api=use_responses_api, ) message = HumanMessage(content="Hello") response = chat.invoke([message]) @@ -133,9 +138,7 @@ def test_chat_openai_streaming_generation_info() -> None: callback = _FakeCallback() callback_manager = CallbackManager([callback]) - chat = ChatOpenAI( - max_completion_tokens=2, temperature=0, callback_manager=callback_manager - ) + chat = ChatOpenAI(max_tokens=2, temperature=0, callback_manager=callback_manager) # type: ignore[call-arg] list(chat.stream("hi")) generation = callback.saved_things["generation"] # `Hello!` is two tokens, assert that that is what is returned @@ -144,7 +147,7 @@ def test_chat_openai_streaming_generation_info() -> None: def test_chat_openai_llm_output_contains_model_name() -> None: """Test llm_output contains model_name.""" - chat = ChatOpenAI(max_completion_tokens=10) + chat = ChatOpenAI(max_tokens=MAX_TOKEN_COUNT) # type: ignore[call-arg] message = HumanMessage(content="Hello") llm_result = chat.generate([[message]]) assert llm_result.llm_output is not None @@ -153,7 +156,7 @@ def test_chat_openai_llm_output_contains_model_name() -> None: def test_chat_openai_streaming_llm_output_contains_model_name() -> None: """Test llm_output contains model_name.""" - chat = ChatOpenAI(max_completion_tokens=10, streaming=True) + chat = ChatOpenAI(max_tokens=MAX_TOKEN_COUNT, streaming=True) # type: ignore[call-arg] message = HumanMessage(content="Hello") llm_result = chat.generate([[message]]) assert llm_result.llm_output is not None @@ -163,13 +166,13 @@ def test_chat_openai_streaming_llm_output_contains_model_name() -> None: def test_chat_openai_invalid_streaming_params() -> None: """Test that streaming correctly invokes on_llm_new_token callback.""" with pytest.raises(ValueError): - ChatOpenAI(max_completion_tokens=10, streaming=True, temperature=0, n=5) + ChatOpenAI(max_tokens=MAX_TOKEN_COUNT, streaming=True, temperature=0, n=5) # type: ignore[call-arg] @pytest.mark.scheduled async def test_async_chat_openai() -> None: """Test async generation.""" - chat = ChatOpenAI(max_completion_tokens=10, n=2) + chat = ChatOpenAI(max_tokens=MAX_TOKEN_COUNT, n=2) # type: ignore[call-arg] message = HumanMessage(content="Hello") response = await chat.agenerate([[message], [message]]) assert isinstance(response, LLMResult) @@ -189,7 +192,7 @@ async def test_async_chat_openai_streaming() -> None: callback_handler = FakeCallbackHandler() callback_manager = CallbackManager([callback_handler]) chat = ChatOpenAI( - max_completion_tokens=10, + max_tokens=MAX_TOKEN_COUNT, # type: ignore[call-arg] streaming=True, temperature=0, callback_manager=callback_manager, @@ -221,7 +224,7 @@ async def test_async_chat_openai_bind_functions() -> None: default=None, title="Fav Food", description="The person's favorite food" ) - chat = ChatOpenAI(max_completion_tokens=30, n=1, streaming=True).bind_functions( + chat = ChatOpenAI(max_tokens=30, n=1, streaming=True).bind_functions( # type: ignore[call-arg] functions=[Person], function_call="Person" ) @@ -243,7 +246,7 @@ async def test_async_chat_openai_bind_functions() -> None: @pytest.mark.scheduled def test_openai_streaming() -> None: """Test streaming tokens from OpenAI.""" - llm = ChatOpenAI(max_completion_tokens=10) + llm = ChatOpenAI(max_tokens=MAX_TOKEN_COUNT) # type: ignore[call-arg] for token in llm.stream("I'm Pickle Rick"): assert isinstance(token.content, str) @@ -252,7 +255,7 @@ def test_openai_streaming() -> None: @pytest.mark.scheduled async def test_openai_astream() -> None: """Test streaming tokens from OpenAI.""" - llm = ChatOpenAI(max_completion_tokens=10) + llm = ChatOpenAI(max_tokens=MAX_TOKEN_COUNT) # type: ignore[call-arg] async for token in llm.astream("I'm Pickle Rick"): assert isinstance(token.content, str) @@ -261,7 +264,7 @@ async def test_openai_astream() -> None: @pytest.mark.scheduled async def test_openai_abatch() -> None: """Test streaming tokens from ChatOpenAI.""" - llm = ChatOpenAI(max_completion_tokens=10) + llm = ChatOpenAI(max_tokens=MAX_TOKEN_COUNT) # type: ignore[call-arg] result = await llm.abatch(["I'm Pickle Rick", "I'm not Pickle Rick"]) for token in result: @@ -269,21 +272,22 @@ async def test_openai_abatch() -> None: @pytest.mark.scheduled -async def test_openai_abatch_tags() -> None: +@pytest.mark.parametrize("use_responses_api", [False, True]) +async def test_openai_abatch_tags(use_responses_api: bool) -> None: """Test batch tokens from ChatOpenAI.""" - llm = ChatOpenAI(max_completion_tokens=10) + llm = ChatOpenAI(max_tokens=MAX_TOKEN_COUNT, use_responses_api=use_responses_api) # type: ignore[call-arg] result = await llm.abatch( ["I'm Pickle Rick", "I'm not Pickle Rick"], config={"tags": ["foo"]} ) for token in result: - assert isinstance(token.content, str) + assert isinstance(token.text(), str) @pytest.mark.scheduled def test_openai_batch() -> None: """Test batch tokens from ChatOpenAI.""" - llm = ChatOpenAI(max_completion_tokens=10) + llm = ChatOpenAI(max_tokens=MAX_TOKEN_COUNT) # type: ignore[call-arg] result = llm.batch(["I'm Pickle Rick", "I'm not Pickle Rick"]) for token in result: @@ -293,7 +297,7 @@ def test_openai_batch() -> None: @pytest.mark.scheduled async def test_openai_ainvoke() -> None: """Test invoke tokens from ChatOpenAI.""" - llm = ChatOpenAI(max_completion_tokens=10) + llm = ChatOpenAI(max_tokens=MAX_TOKEN_COUNT) # type: ignore[call-arg] result = await llm.ainvoke("I'm Pickle Rick", config={"tags": ["foo"]}) assert isinstance(result.content, str) @@ -302,7 +306,7 @@ async def test_openai_ainvoke() -> None: @pytest.mark.scheduled def test_openai_invoke() -> None: """Test invoke tokens from ChatOpenAI.""" - llm = ChatOpenAI(max_completion_tokens=10) + llm = ChatOpenAI(max_tokens=MAX_TOKEN_COUNT) # type: ignore[call-arg] result = llm.invoke("I'm Pickle Rick", config=dict(tags=["foo"])) assert isinstance(result.content, str) @@ -387,7 +391,7 @@ async def test_astream() -> None: assert chunks_with_token_counts == 0 assert full.usage_metadata is None - llm = ChatOpenAI(temperature=0, max_completion_tokens=5) + llm = ChatOpenAI(temperature=0, max_tokens=MAX_TOKEN_COUNT) # type: ignore[call-arg] await _test_stream(llm.astream("Hello"), expect_usage=False) await _test_stream( llm.astream("Hello", stream_options={"include_usage": True}), expect_usage=True @@ -395,7 +399,7 @@ async def test_astream() -> None: await _test_stream(llm.astream("Hello", stream_usage=True), expect_usage=True) llm = ChatOpenAI( temperature=0, - max_completion_tokens=5, + max_tokens=MAX_TOKEN_COUNT, # type: ignore[call-arg] model_kwargs={"stream_options": {"include_usage": True}}, ) await _test_stream(llm.astream("Hello"), expect_usage=True) @@ -403,7 +407,7 @@ async def test_astream() -> None: llm.astream("Hello", stream_options={"include_usage": False}), expect_usage=False, ) - llm = ChatOpenAI(temperature=0, max_completion_tokens=5, stream_usage=True) + llm = ChatOpenAI(temperature=0, max_tokens=MAX_TOKEN_COUNT, stream_usage=True) # type: ignore[call-arg] await _test_stream(llm.astream("Hello"), expect_usage=True) await _test_stream(llm.astream("Hello", stream_usage=False), expect_usage=False) @@ -572,9 +576,12 @@ def test_tool_use() -> None: llm_with_tool.invoke(msgs) -def test_manual_tool_call_msg() -> None: +@pytest.mark.parametrize("use_responses_api", [False, True]) +def test_manual_tool_call_msg(use_responses_api: bool) -> None: """Test passing in manually construct tool call message.""" - llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0) + llm = ChatOpenAI( + model="gpt-3.5-turbo-0125", temperature=0, use_responses_api=use_responses_api + ) llm_with_tool = llm.bind_tools(tools=[GenerateUsername]) msgs: List = [ HumanMessage("Sally has green hair, what would her username be?"), @@ -615,9 +622,12 @@ def test_manual_tool_call_msg() -> None: llm_with_tool.invoke(msgs) -def test_bind_tools_tool_choice() -> None: +@pytest.mark.parametrize("use_responses_api", [False, True]) +def test_bind_tools_tool_choice(use_responses_api: bool) -> None: """Test passing in manually construct tool call message.""" - llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0) + llm = ChatOpenAI( + model="gpt-3.5-turbo-0125", temperature=0, use_responses_api=use_responses_api + ) for tool_choice in ("any", "required"): llm_with_tools = llm.bind_tools( tools=[GenerateUsername, MakeASandwich], tool_choice=tool_choice @@ -677,11 +687,14 @@ def test_openai_proxy() -> None: assert proxy.port == 8080 -def test_openai_response_headers() -> None: +@pytest.mark.parametrize("use_responses_api", [False, True]) +def test_openai_response_headers(use_responses_api: bool) -> None: """Test ChatOpenAI response headers.""" - chat_openai = ChatOpenAI(include_response_headers=True) + chat_openai = ChatOpenAI( + include_response_headers=True, use_responses_api=use_responses_api + ) query = "I'm Pickle Rick" - result = chat_openai.invoke(query, max_completion_tokens=10) + result = chat_openai.invoke(query, max_tokens=MAX_TOKEN_COUNT) # type: ignore[call-arg] headers = result.response_metadata["headers"] assert headers assert isinstance(headers, dict) @@ -689,7 +702,7 @@ def test_openai_response_headers() -> None: # Stream full: Optional[BaseMessageChunk] = None - for chunk in chat_openai.stream(query, max_completion_tokens=10): + for chunk in chat_openai.stream(query, max_tokens=MAX_TOKEN_COUNT): # type: ignore[call-arg] full = chunk if full is None else full + chunk assert isinstance(full, AIMessage) headers = full.response_metadata["headers"] @@ -698,11 +711,14 @@ def test_openai_response_headers() -> None: assert "content-type" in headers -async def test_openai_response_headers_async() -> None: +@pytest.mark.parametrize("use_responses_api", [False, True]) +async def test_openai_response_headers_async(use_responses_api: bool) -> None: """Test ChatOpenAI response headers.""" - chat_openai = ChatOpenAI(include_response_headers=True) + chat_openai = ChatOpenAI( + include_response_headers=True, use_responses_api=use_responses_api + ) query = "I'm Pickle Rick" - result = await chat_openai.ainvoke(query, max_completion_tokens=10) + result = await chat_openai.ainvoke(query, max_tokens=MAX_TOKEN_COUNT) # type: ignore[call-arg] headers = result.response_metadata["headers"] assert headers assert isinstance(headers, dict) @@ -710,7 +726,7 @@ async def test_openai_response_headers_async() -> None: # Stream full: Optional[BaseMessageChunk] = None - async for chunk in chat_openai.astream(query, max_completion_tokens=10): + async for chunk in chat_openai.astream(query, max_tokens=MAX_TOKEN_COUNT): # type: ignore[call-arg] full = chunk if full is None else full + chunk assert isinstance(full, AIMessage) headers = full.response_metadata["headers"] @@ -795,7 +811,8 @@ def test_image_token_counting_png() -> None: assert expected == actual -def test_tool_calling_strict() -> None: +@pytest.mark.parametrize("use_responses_api", [False, True]) +def test_tool_calling_strict(use_responses_api: bool) -> None: """Test tool calling with strict=True.""" class magic_function(BaseModel): @@ -803,7 +820,9 @@ def test_tool_calling_strict() -> None: input: int - model = ChatOpenAI(model="gpt-4o", temperature=0) + model = ChatOpenAI( + model="gpt-4o", temperature=0, use_responses_api=use_responses_api + ) model_with_tools = model.bind_tools([magic_function], strict=True) # invalid_magic_function adds metadata to schema that isn't supported by OpenAI. @@ -832,19 +851,22 @@ def test_tool_calling_strict() -> None: next(model_with_invalid_tool_schema.stream(query)) +@pytest.mark.parametrize("use_responses_api", [False, True]) @pytest.mark.parametrize( ("model", "method"), [("gpt-4o", "function_calling"), ("gpt-4o-2024-08-06", "json_schema")], ) def test_structured_output_strict( - model: str, method: Literal["function_calling", "json_schema"] + model: str, + method: Literal["function_calling", "json_schema"], + use_responses_api: bool, ) -> None: """Test to verify structured output with strict=True.""" from pydantic import BaseModel as BaseModelProper from pydantic import Field as FieldProper - llm = ChatOpenAI(model=model) + llm = ChatOpenAI(model=model, use_responses_api=use_responses_api) class Joke(BaseModelProper): """Joke to tell user.""" @@ -898,15 +920,16 @@ def test_structured_output_strict( next(chat.stream("Tell me a joke about cats.")) +@pytest.mark.parametrize("use_responses_api", [False, True]) @pytest.mark.parametrize(("model", "method"), [("gpt-4o-2024-08-06", "json_schema")]) def test_nested_structured_output_strict( - model: str, method: Literal["json_schema"] + model: str, method: Literal["json_schema"], use_responses_api: bool ) -> None: """Test to verify structured output with strict=True for nested object.""" from typing import TypedDict - llm = ChatOpenAI(model=model, temperature=0) + llm = ChatOpenAI(model=model, temperature=0, use_responses_api=use_responses_api) class SelfEvaluation(TypedDict): score: int @@ -1124,12 +1147,20 @@ def test_prediction_tokens() -> None: assert output_token_details["rejected_prediction_tokens"] > 0 -def test_stream_o1() -> None: - list(ChatOpenAI(model="o1-mini").stream("how are you")) +@pytest.mark.parametrize("use_responses_api", [False, True]) +def test_stream_o_series(use_responses_api: bool) -> None: + list( + ChatOpenAI(model="o3-mini", use_responses_api=use_responses_api).stream( + "how are you" + ) + ) -async def test_astream_o1() -> None: - async for _ in ChatOpenAI(model="o1-mini").astream("how are you"): +@pytest.mark.parametrize("use_responses_api", [False, True]) +async def test_astream_o_series(use_responses_api: bool) -> None: + async for _ in ChatOpenAI( + model="o3-mini", use_responses_api=use_responses_api + ).astream("how are you"): pass @@ -1171,21 +1202,27 @@ async def test_astream_response_format() -> None: assert parsed.response == parsed_content["response"] +@pytest.mark.parametrize("use_responses_api", [False, True]) @pytest.mark.parametrize("use_max_completion_tokens", [True, False]) -def test_o1(use_max_completion_tokens: bool) -> None: +def test_o1(use_max_completion_tokens: bool, use_responses_api: bool) -> None: if use_max_completion_tokens: - kwargs: dict = {"max_completion_tokens": 10} + kwargs: dict = {"max_completion_tokens": MAX_TOKEN_COUNT} else: - kwargs = {"max_tokens": 10} - response = ChatOpenAI(model="o1", reasoning_effort="low", **kwargs).invoke( + kwargs = {"max_tokens": MAX_TOKEN_COUNT} + response = ChatOpenAI( + model="o1", + reasoning_effort="low", + use_responses_api=use_responses_api, + **kwargs, + ).invoke( [ {"role": "developer", "content": "respond in all caps"}, {"role": "user", "content": "HOW ARE YOU"}, ] ) assert isinstance(response, AIMessage) - assert isinstance(response.content, str) - assert response.content.upper() == response.content + assert isinstance(response.text(), str) + assert response.text().upper() == response.text() @pytest.mark.scheduled diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py index a9e4c3ca20b..fd4a6665761 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py @@ -4,6 +4,7 @@ import json import os from typing import Any, Optional, cast +import openai import pytest from langchain_core.messages import ( AIMessage, @@ -12,7 +13,7 @@ from langchain_core.messages import ( BaseMessageChunk, ) from pydantic import BaseModel -from typing_extensions import TypedDict +from typing_extensions import Annotated, TypedDict from langchain_openai import ChatOpenAI @@ -81,6 +82,15 @@ def test_web_search() -> None: # Manually pass in chat history response = llm.invoke( [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What was a positive news story from today?", + } + ], + }, first_response, { "role": "user", @@ -206,6 +216,31 @@ def test_parsed_dict_schema(schema: Any) -> None: assert parsed["response"] and isinstance(parsed["response"], str) +def test_parsed_strict() -> None: + llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True) + + class InvalidJoke(TypedDict): + setup: Annotated[str, ..., "The setup of the joke"] + punchline: Annotated[str, None, "The punchline of the joke"] + + # Test not strict + response = llm.invoke("Tell me a joke", response_format=InvalidJoke) + parsed = json.loads(response.text()) + assert parsed == response.additional_kwargs["parsed"] + + # Test strict + with pytest.raises(openai.BadRequestError): + llm.invoke( + "Tell me a joke about cats.", response_format=InvalidJoke, strict=True + ) + with pytest.raises(openai.BadRequestError): + next( + llm.stream( + "Tell me a joke about cats.", response_format=InvalidJoke, strict=True + ) + ) + + @pytest.mark.parametrize("schema", [Foo.model_json_schema(), FooDict]) async def test_parsed_dict_schema_async(schema: Any) -> None: llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True) @@ -245,6 +280,18 @@ def test_function_calling_and_structured_output() -> None: assert set(ai_msg.tool_calls[0]["args"]) == {"x", "y"} +def test_reasoning() -> None: + llm = ChatOpenAI(model="o3-mini", use_responses_api=True) + response = llm.invoke("Hello", reasoning={"effort": "low"}) + assert isinstance(response, AIMessage) + assert response.additional_kwargs["reasoning"] + + llm = ChatOpenAI(model="o3-mini", reasoning_effort="low", use_responses_api=True) + response = llm.invoke("Hello") + assert isinstance(response, AIMessage) + assert response.additional_kwargs["reasoning"] + + def test_stateful_api() -> None: llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True) response = llm.invoke("how are you, my name is Bobo") diff --git a/libs/partners/openai/uv.lock b/libs/partners/openai/uv.lock index b79bdca9765..0103fcf2394 100644 --- a/libs/partners/openai/uv.lock +++ b/libs/partners/openai/uv.lock @@ -462,7 +462,7 @@ wheels = [ [[package]] name = "langchain-core" -version = "0.3.45rc1" +version = "0.3.45" source = { editable = "../../core" } dependencies = [ { name = "jsonpatch" }, @@ -566,7 +566,7 @@ typing = [ [package.metadata] requires-dist = [ { name = "langchain-core", editable = "../../core" }, - { name = "openai", specifier = ">=1.66.0,<2.0.0" }, + { name = "openai", specifier = ">=1.66.3,<2.0.0" }, { name = "tiktoken", specifier = ">=0.7,<1" }, ] @@ -751,7 +751,7 @@ wheels = [ [[package]] name = "openai" -version = "1.66.0" +version = "1.66.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -763,9 +763,9 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/84/c5/3c422ca3ccc81c063955e7c20739d7f8f37fea0af865c4a60c81e6225e14/openai-1.66.0.tar.gz", hash = "sha256:8a9e672bc6eadec60a962f0b40d7d1c09050010179c919ed65322e433e2d1025", size = 396819 } +sdist = { url = "https://files.pythonhosted.org/packages/a3/77/5172104ca1df35ed2ed8fb26dbc787f721c39498fc51d666c4db07756a0c/openai-1.66.3.tar.gz", hash = "sha256:8dde3aebe2d081258d4159c4cb27bdc13b5bb3f7ea2201d9bd940b9a89faf0c9", size = 397244 } wheels = [ - { url = "https://files.pythonhosted.org/packages/d7/f1/d52960dac9519c9de64593460826a0fe2e19159389ec97ecf3e931d2e6a3/openai-1.66.0-py3-none-any.whl", hash = "sha256:43e4a3c0c066cc5809be4e6aac456a3ebc4ec1848226ef9d1340859ac130d45a", size = 566389 }, + { url = "https://files.pythonhosted.org/packages/78/5a/e20182f7b6171642d759c548daa0ba20a1d3ac10d2bd0a13fd75704a9ac3/openai-1.66.3-py3-none-any.whl", hash = "sha256:a427c920f727711877ab17c11b95f1230b27767ba7a01e5b66102945141ceca9", size = 567400 }, ] [[package]]