diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py index c499492502d..817d34caaae 100644 --- a/libs/core/langchain_core/language_models/chat_models.py +++ b/libs/core/langchain_core/language_models/chat_models.py @@ -471,6 +471,9 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC): if "stream" in kwargs: return kwargs["stream"] + if getattr(self, "streaming", False): + return True + # Check if any streaming callback handlers have been passed in. handlers = run_manager.handlers if run_manager else [] return any(isinstance(h, _StreamingCallbackHandler) for h in handlers) diff --git a/libs/partners/anthropic/langchain_anthropic/chat_models.py b/libs/partners/anthropic/langchain_anthropic/chat_models.py index bb6acdfa6d8..d3f723552d7 100644 --- a/libs/partners/anthropic/langchain_anthropic/chat_models.py +++ b/libs/partners/anthropic/langchain_anthropic/chat_models.py @@ -21,8 +21,6 @@ from langchain_core.language_models import LanguageModelInput from langchain_core.language_models.chat_models import ( BaseChatModel, LangSmithParams, - agenerate_from_stream, - generate_from_stream, ) from langchain_core.messages import ( AIMessage, @@ -1845,14 +1843,6 @@ class ChatAnthropic(BaseChatModel): run_manager: Optional[CallbackManagerForLLMRun] = None, **kwargs: Any, ) -> ChatResult: - if self.streaming: - stream_iter = self._stream( - messages, - stop=stop, - run_manager=run_manager, - **kwargs, - ) - return generate_from_stream(stream_iter) payload = self._get_request_payload(messages, stop=stop, **kwargs) try: data = self._create(payload) @@ -1867,14 +1857,6 @@ class ChatAnthropic(BaseChatModel): run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, **kwargs: Any, ) -> ChatResult: - if self.streaming: - stream_iter = self._astream( - messages, - stop=stop, - run_manager=run_manager, - **kwargs, - ) - return await agenerate_from_stream(stream_iter) payload = self._get_request_payload(messages, stop=stop, **kwargs) try: data = await self._acreate(payload) diff --git a/libs/partners/anthropic/tests/unit_tests/test_chat_models.py b/libs/partners/anthropic/tests/unit_tests/test_chat_models.py index a62c04a1f70..aaf53df41aa 100644 --- a/libs/partners/anthropic/tests/unit_tests/test_chat_models.py +++ b/libs/partners/anthropic/tests/unit_tests/test_chat_models.py @@ -47,6 +47,12 @@ def test_initialization() -> None: assert model.anthropic_api_url == "https://api.anthropic.com" +@pytest.mark.parametrize("async_api", [True, False]) +def test_streaming_attribute_should_stream(async_api: bool) -> None: # noqa: FBT001 + llm = ChatAnthropic(model="foo", streaming=True) + assert llm._should_stream(async_api=async_api) + + def test_anthropic_client_caching() -> None: """Test that the OpenAI client is cached.""" llm1 = ChatAnthropic(model="claude-3-5-sonnet-latest") diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index 3fad2400b2c..0aee066bb35 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -38,8 +38,6 @@ from langchain_core.language_models import LanguageModelInput from langchain_core.language_models.chat_models import ( BaseChatModel, LangSmithParams, - agenerate_from_stream, - generate_from_stream, ) from langchain_core.messages import ( AIMessage, @@ -1187,11 +1185,6 @@ class BaseChatOpenAI(BaseChatModel): run_manager: Optional[CallbackManagerForLLMRun] = None, **kwargs: Any, ) -> ChatResult: - if self.streaming: - stream_iter = self._stream( - messages, stop=stop, run_manager=run_manager, **kwargs - ) - return generate_from_stream(stream_iter) payload = self._get_request_payload(messages, stop=stop, **kwargs) generation_info = None raw_response = None @@ -1432,11 +1425,6 @@ class BaseChatOpenAI(BaseChatModel): run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, **kwargs: Any, ) -> ChatResult: - if self.streaming: - stream_iter = self._astream( - messages, stop=stop, run_manager=run_manager, **kwargs - ) - return await agenerate_from_stream(stream_iter) payload = self._get_request_payload(messages, stop=stop, **kwargs) generation_info = None raw_response = None diff --git a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py index 5af243707f6..ff61301587d 100644 --- a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py +++ b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py @@ -89,6 +89,12 @@ def test_openai_model_param() -> None: assert llm.max_tokens == 10 +@pytest.mark.parametrize("async_api", [True, False]) +def test_streaming_attribute_should_stream(async_api: bool) -> None: + llm = ChatOpenAI(model="foo", streaming=True) + assert llm._should_stream(async_api=async_api) + + def test_openai_client_caching() -> None: """Test that the OpenAI client is cached.""" llm1 = ChatOpenAI(model="gpt-4.1-mini")